From 4974006ba4f9ed97fbbab20a539b560e5d4c42cc Mon Sep 17 00:00:00 2001 From: Richard Schneeman Date: Sat, 9 Sep 2023 10:53:06 -0500 Subject: [PATCH] Exploded layer proof of concept The current design of the Layer trait encourages developers to put lots of logic into their layer. One impact of this, is it forces stateful logging to jump through hoops that I would call less ergonomic https://github.com/heroku/libcnb.rs/pull/669. This proof of concept is a re-imagining of how we might want to interact with a layer using only the existing layer primitives from libcnb. It encourages operations to be performed outside of a layer, because it does not require the developer to implement Layer on their own structs. Instead it treats metadata as a primitive and makes "read" and "write" layer information their own discrete actions. At a high level the flow of interacting with a layer now becomes: - Read cached data (get LayerData) - Do stuff (modify path, log, etc.) - Write new data (write LayerResult) In this process I reimagined the flow for cache invalidation to fit within a `MetadataDiff` enum. This essentially encourages a set of "reasonable defaults" for equality based metadata, but does not restrict to a specific use cases. Worth noting that I've got an idea of a way to better handle metadata that cannot be deserialized, but it would be distracting from the primary purpose of this PoC. ## Known The code in `main.rs` is quite long and would likely be moved to its own function if we continued down this path. ## Discussion/Feedback The purpose of this PR is to get feedback on this approach, both high level (concept) and low level (implementation). --- Cargo.lock | 12 +- .../ruby/src/layers/ruby_install_layer.rs | 127 +++--------- buildpacks/ruby/src/main.rs | 108 ++++++++-- commons/Cargo.toml | 2 + commons/src/cache.rs | 4 + commons/src/cache/read_write_layer.rs | 195 ++++++++++++++++++ commons/src/layer.rs | 4 + commons/src/layer/get_layer_data.rs | 72 +++++++ commons/src/layer/set_layer_data.rs | 99 +++++++++ 9 files changed, 505 insertions(+), 118 deletions(-) create mode 100644 commons/src/cache/read_write_layer.rs create mode 100644 commons/src/layer/get_layer_data.rs create mode 100644 commons/src/layer/set_layer_data.rs diff --git a/Cargo.lock b/Cargo.lock index 82803bc4..57363a54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,6 +150,7 @@ dependencies = [ "fs_extra", "glob", "indoc", + "itertools 0.11.0", "lazy_static", "libcnb", "libcnb-test", @@ -462,6 +463,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.9" @@ -1277,7 +1287,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a68ac9b870fb8707f12778b31fb122ec6cf1fc693af5be5fddaeb5341c0a7a4a" dependencies = [ "is_executable", - "itertools", + "itertools 0.10.5", "ordered-float", "rayon", "strsim", diff --git a/buildpacks/ruby/src/layers/ruby_install_layer.rs b/buildpacks/ruby/src/layers/ruby_install_layer.rs index 21e7f355..54a4f1b2 100644 --- a/buildpacks/ruby/src/layers/ruby_install_layer.rs +++ b/buildpacks/ruby/src/layers/ruby_install_layer.rs @@ -1,16 +1,10 @@ #[allow(clippy::wildcard_imports)] -use commons::output::{ - fmt::{self}, - section_log::*, -}; - -use crate::{RubyBuildpack, RubyBuildpackError}; +use crate::RubyBuildpackError; use commons::gemfile_lock::ResolvedRubyVersion; +use commons::output::section_log::SectionLogger; use flate2::read::GzDecoder; -use libcnb::build::BuildContext; use libcnb::data::buildpack::StackId; -use libcnb::data::layer_content_metadata::LayerTypes; -use libcnb::layer::{ExistingLayerStrategy, Layer, LayerData, LayerResult, LayerResultBuilder}; +use libcnb::layer::{LayerResult, LayerResultBuilder}; use serde::{Deserialize, Serialize}; use std::io; use std::path::Path; @@ -31,99 +25,40 @@ use url::Url; /// /// When the Ruby version changes, invalidate and re-run. /// -pub(crate) struct RubyInstallLayer<'a> { - pub _in_section: &'a dyn SectionLogger, // force the layer to be called within a Section logging context, not necessary but it's safer - pub metadata: RubyInstallLayerMetadata, -} -#[derive(Deserialize, Serialize, Debug, Clone)] +#[derive(Deserialize, Serialize, Debug, Clone, Eq, PartialEq)] pub(crate) struct RubyInstallLayerMetadata { pub stack: StackId, pub version: ResolvedRubyVersion, } -impl<'a> Layer for RubyInstallLayer<'a> { - type Buildpack = RubyBuildpack; - type Metadata = RubyInstallLayerMetadata; - - fn types(&self) -> LayerTypes { - LayerTypes { - build: true, - launch: true, - cache: true, - } - } - - fn create( - &self, - _context: &BuildContext, - layer_path: &Path, - ) -> Result, RubyBuildpackError> { - log_step_timed("Installing", || { - let tmp_ruby_tgz = NamedTempFile::new() - .map_err(RubyInstallError::CouldNotCreateDestinationFile) - .map_err(RubyBuildpackError::RubyInstallError)?; - - let url = download_url(&self.metadata.stack, &self.metadata.version) - .map_err(RubyBuildpackError::RubyInstallError)?; - - download(url.as_ref(), tmp_ruby_tgz.path()) - .map_err(RubyBuildpackError::RubyInstallError)?; - - untar(tmp_ruby_tgz.path(), layer_path).map_err(RubyBuildpackError::RubyInstallError)?; - - LayerResultBuilder::new(self.metadata.clone()).build() - }) - } - - fn existing_layer_strategy( - &self, - _context: &BuildContext, - layer_data: &LayerData, - ) -> Result { - let old = &layer_data.content_metadata.metadata; - let now = self.metadata.clone(); - - match cache_state(old.clone(), now) { - Changed::Nothing(_version) => { - log_step("Using cached version"); - - Ok(ExistingLayerStrategy::Keep) - } - Changed::Stack(_old, _now) => { - log_step(format!("Clearing cache {}", fmt::details("stack changed"))); - - Ok(ExistingLayerStrategy::Recreate) - } - Changed::RubyVersion(_old, _now) => { - log_step(format!( - "Clearing cache {}", - fmt::details("ruby version changed") - )); - - Ok(ExistingLayerStrategy::Recreate) - } - } - } -} - -fn cache_state(old: RubyInstallLayerMetadata, now: RubyInstallLayerMetadata) -> Changed { - let RubyInstallLayerMetadata { stack, version } = now; - - if old.stack != stack { - Changed::Stack(old.stack, stack) - } else if old.version != version { - Changed::RubyVersion(old.version, version) - } else { - Changed::Nothing(version) - } -} - -#[derive(Debug)] -enum Changed { - Nothing(ResolvedRubyVersion), - Stack(StackId, StackId), - RubyVersion(ResolvedRubyVersion, ResolvedRubyVersion), +pub(crate) fn install( + log: Box, + path: &Path, + metadata: RubyInstallLayerMetadata, +) -> Result< + ( + Box, + LayerResult, + ), + RubyBuildpackError, +> { + let timer = log.step_timed("Installing"); + let tmp_ruby_tgz = NamedTempFile::new() + .map_err(RubyInstallError::CouldNotCreateDestinationFile) + .map_err(RubyBuildpackError::RubyInstallError)?; + + let url = download_url(&metadata.stack, &metadata.version) + .map_err(RubyBuildpackError::RubyInstallError)?; + + download(url.as_ref(), tmp_ruby_tgz.path()).map_err(RubyBuildpackError::RubyInstallError)?; + + untar(tmp_ruby_tgz.path(), path).map_err(RubyBuildpackError::RubyInstallError)?; + let log = timer.finish_timed_step(); + + LayerResultBuilder::new(metadata) + .build() + .map(|result| (log, result)) } fn download_url(stack: &StackId, version: impl std::fmt::Display) -> Result { diff --git a/buildpacks/ruby/src/main.rs b/buildpacks/ruby/src/main.rs index 2dc19a57..2eebe6ff 100644 --- a/buildpacks/ruby/src/main.rs +++ b/buildpacks/ruby/src/main.rs @@ -1,7 +1,7 @@ #![warn(unused_crate_dependencies)] #![warn(clippy::pedantic)] #![allow(clippy::module_name_repetitions)] -use commons::cache::CacheError; +use commons::cache::{toml_delta, CacheError, CachedLayer, CachedLayerData, MetadataDiff}; use commons::fun_run::CmdError; use commons::gemfile_lock::GemfileLock; use commons::metadata_digest::MetadataDigest; @@ -10,8 +10,7 @@ use core::str::FromStr; use layers::{ bundle_download_layer::BundleDownloadLayer, bundle_download_layer::BundleDownloadLayerMetadata, bundle_install_layer::BundleInstallLayer, bundle_install_layer::BundleInstallLayerMetadata, - ruby_install_layer::RubyInstallError, ruby_install_layer::RubyInstallLayer, - ruby_install_layer::RubyInstallLayerMetadata, + ruby_install_layer::RubyInstallError, ruby_install_layer::RubyInstallLayerMetadata, }; use libcnb::build::{BuildContext, BuildResult, BuildResultBuilder}; use libcnb::data::build_plan::BuildPlanBuilder; @@ -19,6 +18,7 @@ use libcnb::data::launch::LaunchBuilder; use libcnb::data::layer_name; use libcnb::detect::{DetectContext, DetectResult, DetectResultBuilder}; use libcnb::generic::{GenericMetadata, GenericPlatform}; +use libcnb::layer::LayerResultBuilder; use libcnb::layer_env::Scope; use libcnb::Platform; use libcnb::{buildpack_main, Buildpack}; @@ -88,26 +88,81 @@ impl Buildpack for RubyBuildpack { let ruby_version = gemfile_lock.resolve_ruby("3.1.3"); // ## Install executable ruby version - (logger, env) = { - let section = logger.section(&format!( - "Ruby version {} from {}", - fmt::value(ruby_version.to_string()), - fmt::value(gemfile_lock.ruby_source()) + let metadata = RubyInstallLayerMetadata { + stack: context.stack_id.clone(), + version: ruby_version.clone(), + }; + + let log_section = logger.section(&format!( + "Ruby version {value} from {source}", + value = fmt::value(metadata.version.to_string()), + source = fmt::value(gemfile_lock.ruby_source()) )); - let ruby_layer = context // - .handle_layer( - layer_name!("ruby"), - RubyInstallLayer { - _in_section: section.as_ref(), - metadata: RubyInstallLayerMetadata { - stack: context.stack_id.clone(), - version: ruby_version.clone(), - }, - }, - )?; - let env = ruby_layer.env.apply(Scope::Build, &env); - (section.end_section(), env) + + let cached_layer = CachedLayer { + name: layer_name!("ruby"), + build: true, + launch: true, + metadata, + } + .read(&context)?; + + let (log_section, result) = match cached_layer.diff() { + MetadataDiff::Same(metadata) => ( + log_section.step("Using cache"), + LayerResultBuilder::new(metadata.clone()).build_unwrapped(), + ), + MetadataDiff::Different { old, now } => { + let reason = if old.version == now.version { + format!( + "{differences} changed", + differences = commons::display::SentenceList::new( + &toml_delta(&old, &now) + .into_iter() + .map(|diff| diff.key) + .collect::>() + ) + ) + } else { + format!( + "Ruby version changed from {old} to {now}", + old = fmt::value(old.version.to_string()), + now = fmt::value(now.version.to_string()) + ) + }; + + crate::layers::ruby_install_layer::install( + clear_cache_with_reason(log_section, &cached_layer, reason).unwrap(), + &cached_layer.path, + now.clone(), + )? + } + MetadataDiff::CannotDeserialize { old: _old, now } => { + crate::layers::ruby_install_layer::install( + clear_cache_with_reason( + log_section, + &cached_layer, + "cannot deserialize metadata", + ) + .unwrap(), + &cached_layer.path, + now.clone(), + )? + } + MetadataDiff::NoCache(metadata) => crate::layers::ruby_install_layer::install( + log_section, + &cached_layer.path, + metadata.clone(), + )?, + }; + + let env = cached_layer + .write(&context, result)? + .env + .apply(Scope::Build, &env); + + (log_section.end_section(), env) }; // ## Setup bundler @@ -212,6 +267,17 @@ fn needs_java(gemfile_lock: &str) -> bool { java_regex.is_match(gemfile_lock) } +fn clear_cache_with_reason( + log: Box, + layer_data: &CachedLayerData, + reason: impl AsRef, +) -> Result, std::io::Error> { + let log = log.step(&format!("Clearing cache ({})", reason.as_ref())); + layer_data.clear_path_contents()?; + + Ok(log) +} + #[derive(Debug)] pub(crate) enum RubyBuildpackError { RakeDetectError(CmdError), diff --git a/commons/Cargo.toml b/commons/Cargo.toml index ba4bc592..b25676c3 100644 --- a/commons/Cargo.toml +++ b/commons/Cargo.toml @@ -28,6 +28,8 @@ thiserror = "1" walkdir = "2" which_problem = "0.1" ascii_table = { version = "4.0.2", features = ["color_codes"] } +toml = "0.7" +itertools = "0.11.0" [dev-dependencies] indoc = "2.0.1" diff --git a/commons/src/cache.rs b/commons/src/cache.rs index f6480876..f72d5aa0 100644 --- a/commons/src/cache.rs +++ b/commons/src/cache.rs @@ -5,6 +5,7 @@ mod clean; mod config; mod error; mod in_app_dir_cache_layer; +mod read_write_layer; pub use self::app_cache::{build, PathState}; pub use self::app_cache::{AppCache, CacheState}; @@ -13,3 +14,6 @@ pub use self::clean::FilesWithSize; pub use self::config::CacheConfig; pub use self::config::{mib, KeepPath}; pub use self::error::CacheError; +pub use self::read_write_layer::{ + metadata_diff, toml_delta, CachedLayer, CachedLayerData, MetadataDiff, +}; diff --git a/commons/src/cache/read_write_layer.rs b/commons/src/cache/read_write_layer.rs new file mode 100644 index 00000000..a7588187 --- /dev/null +++ b/commons/src/cache/read_write_layer.rs @@ -0,0 +1,195 @@ +use itertools::Itertools; +use libcnb::build::BuildContext; +use libcnb::data::layer::LayerName; +use libcnb::data::layer_content_metadata::LayerTypes; +use libcnb::layer::LayerData; +use libcnb::layer::LayerResult; +use serde::de::DeserializeOwned; +use serde::Serialize; +use std::marker::PhantomData; +use std::path::PathBuf; + +use crate::layer::{GetLayerData, SetLayerData}; + +#[derive(Debug, Clone)] +pub enum MetadataDiff { + Same(M), + Different { + old: M, + now: M, + }, + CannotDeserialize { + old: Option, + now: M, + }, + NoCache(M), +} + +#[derive(Debug, Clone)] +pub struct TomlDelta { + pub key: String, + pub old: Option, + pub now: Option, +} + +/// Returns the delta between the two objects +pub fn toml_delta(old: &M, now: &M) -> Vec { + let old = toml::to_string(&old) + .ok() + .and_then(|string| string.parse::().ok()) + .unwrap_or_default(); + + let now = toml::to_string(&now) + .ok() + .and_then(|string| string.parse::().ok()) + .unwrap_or_default(); + + let mut diff = Vec::new(); + for key in old.keys().chain(now.keys()).unique() { + match (old.get(key).cloned(), now.get(key).cloned()) { + (old_value, now_value) if old_value != now_value => diff.push(TomlDelta { + key: key.clone(), + old: old_value.clone(), + now: now_value.clone(), + }), + _ => {} + } + } + + diff +} + +pub fn metadata_diff(raw_metadata: &Option, metadata: M) -> MetadataDiff +where + M: Serialize + DeserializeOwned + Eq + PartialEq + Clone, +{ + let cache_data = raw_metadata.clone(); + if let Some(cache) = cache_data.clone() { + let old: Result = cache.try_into(); + match &old { + Ok(old) => { + if old == &metadata { + MetadataDiff::Same(metadata) + } else { + MetadataDiff::Different { + old: old.clone(), + now: metadata.clone(), + } + } + } + Err(_) => MetadataDiff::CannotDeserialize { + old: cache_data, + now: metadata, + }, + } + } else { + MetadataDiff::NoCache(metadata) + } +} + +pub struct CachedLayer { + pub name: LayerName, + pub build: bool, + pub launch: bool, + pub metadata: M, +} + +impl CachedLayer +where + M: Serialize + DeserializeOwned + Eq + PartialEq + Clone, +{ + /// # Errors + /// + /// TODO + pub fn read( + &self, + context: &BuildContext, + ) -> Result, libcnb::Error> { + let (read_name, write_name) = (clone_name(&self.name), clone_name(&self.name)); + + let data = context // + .handle_layer( + read_name, + GetLayerData::new(LayerTypes { + cache: true, + launch: self.launch, + build: self.build, + }), + )?; + + let name = write_name; + let path = data.path.clone(); + let metadata_diff = metadata_diff(&data.content_metadata.metadata, self.metadata.clone()); + let buildpack = PhantomData::; + let layer_types = LayerTypes { + cache: true, + build: self.build, + launch: self.launch, + }; + + Ok(CachedLayerData { + name, + path, + layer_types, + metadata_diff, + buildpack, + }) + } +} + +fn clone_name(name: &LayerName) -> LayerName { + name.as_str() + .parse::() + .expect("Parsing a layer name from a layer name should be infailable") +} + +pub struct CachedLayerData { + pub name: LayerName, + pub path: PathBuf, + pub layer_types: LayerTypes, + pub metadata_diff: MetadataDiff, + + buildpack: PhantomData, +} + +impl CachedLayerData { + /// # Errors + /// + /// TODO + pub fn clear_path_contents(&self) -> Result<(), std::io::Error> { + // Ideally would return licnb::Error::HandleLayerError but the internal type not exposed + fs_err::remove_dir_all(&self.path)?; + fs_err::create_dir_all(&self.path) + } +} + +impl CachedLayerData +where + M: Serialize + DeserializeOwned + Eq + PartialEq + Clone, + B: libcnb::Buildpack, +{ + /// # Errors + /// + /// TODO + pub fn write( + &self, + context: &BuildContext, + layer_result: LayerResult, + ) -> Result, libcnb::Error> { + context.handle_layer( + clone_name(&self.name), + SetLayerData::new( + LayerTypes { + cache: self.layer_types.cache, + build: self.layer_types.build, + launch: self.layer_types.launch, + }, + layer_result, + ), + ) + } + + pub fn diff(&self) -> MetadataDiff { + self.metadata_diff.clone() + } +} diff --git a/commons/src/layer.rs b/commons/src/layer.rs index 74807ed9..cb4ce29c 100644 --- a/commons/src/layer.rs +++ b/commons/src/layer.rs @@ -1,6 +1,10 @@ #![allow(clippy::module_name_repetitions)] mod configure_env_layer; mod default_env_layer; +mod get_layer_data; +mod set_layer_data; pub use self::configure_env_layer::ConfigureEnvLayer; pub use self::default_env_layer::DefaultEnvLayer; +pub use self::get_layer_data::GetLayerData; +pub use self::set_layer_data::SetLayerData; diff --git a/commons/src/layer/get_layer_data.rs b/commons/src/layer/get_layer_data.rs new file mode 100644 index 00000000..1535ef2a --- /dev/null +++ b/commons/src/layer/get_layer_data.rs @@ -0,0 +1,72 @@ +use libcnb::build::BuildContext; +use libcnb::data::layer_content_metadata::LayerTypes; +use libcnb::generic::GenericMetadata; +use libcnb::layer::{ExistingLayerStrategy, Layer, LayerData, LayerResult, LayerResultBuilder}; +use libcnb::layer_env::LayerEnv; +use std::marker::PhantomData; +use std::path::Path; + +/// A struct with one purpose: Retrieve prior `LayerData` from the last build (if there is any) +#[derive(Debug)] +pub struct GetLayerData { + buildpack: PhantomData, + layer_types: LayerTypes, +} + +impl GetLayerData { + #[must_use] + pub fn new(layer_types: LayerTypes) -> Self { + Self { + buildpack: PhantomData, + layer_types, + } + } +} + +impl Layer for GetLayerData +where + B: libcnb::Buildpack, +{ + type Buildpack = B; + type Metadata = GenericMetadata; + + /// An unfortunate byproduct of this interface is that we have to write layer types when we read + /// cached layer data. + fn types(&self) -> LayerTypes { + LayerTypes { + launch: self.layer_types.launch, + build: self.layer_types.build, + cache: self.layer_types.cache, + } + } + + fn create( + &self, + _context: &BuildContext, + _layer_path: &Path, + ) -> Result, ::Error> { + LayerResultBuilder::new(GenericMetadata::default()) + .env(LayerEnv::new()) + .build() + } + + fn existing_layer_strategy( + &self, + _context: &BuildContext, + _layer_data: &LayerData, + ) -> Result::Error> { + Ok(ExistingLayerStrategy::Keep) + } + + fn migrate_incompatible_metadata( + &self, + _context: &BuildContext, + _metadata: &libcnb::generic::GenericMetadata, + ) -> Result< + libcnb::layer::MetadataMigration, + ::Error, + > { + eprint!("Warning: Clearing cache (Could not seriailize metadata from cache)"); + Ok(libcnb::layer::MetadataMigration::RecreateLayer) + } +} diff --git a/commons/src/layer/set_layer_data.rs b/commons/src/layer/set_layer_data.rs new file mode 100644 index 00000000..e0a812b9 --- /dev/null +++ b/commons/src/layer/set_layer_data.rs @@ -0,0 +1,99 @@ +use libcnb::build::BuildContext; +use libcnb::data::layer_content_metadata::LayerTypes; +use libcnb::generic::GenericMetadata; +use libcnb::layer::{ExistingLayerStrategy, Layer, LayerData, LayerResult}; +use libcnb::Buildpack; +use serde::de::DeserializeOwned; +use serde::Serialize; +use std::path::Path; + +// Does everything but modifies the disk +pub struct SetLayerData { + buildpack: std::marker::PhantomData, + layer_types: LayerTypes, + layer_result: LayerResult, +} + +impl SetLayerData { + pub fn new(layer_types: LayerTypes, layer_result: LayerResult) -> Self { + Self { + buildpack: std::marker::PhantomData, + layer_types, + layer_result, + } + } +} + +impl Layer for SetLayerData +where + B: Buildpack, + M: Serialize + DeserializeOwned + Clone, +{ + type Buildpack = B; + type Metadata = M; + + fn types(&self) -> LayerTypes { + LayerTypes { + launch: self.layer_types.launch, + build: self.layer_types.build, + cache: self.layer_types.cache, + } + } + + fn create( + &self, + _context: &BuildContext, + _layer_path: &Path, + ) -> Result, ::Error> { + let metadata = self.layer_result.metadata.clone(); + let env = self.layer_result.env.clone(); + let exec_d_programs = self.layer_result.exec_d_programs.clone(); + let sboms = self.layer_result.sboms.clone(); + + Ok(LayerResult { + metadata, + env, + exec_d_programs, + sboms, + }) + } + + fn update( + &self, + _context: &BuildContext, + _layer_data: &LayerData, + ) -> Result, ::Error> { + let metadata = self.layer_result.metadata.clone(); + let env = self.layer_result.env.clone(); + let exec_d_programs = self.layer_result.exec_d_programs.clone(); + let sboms = self.layer_result.sboms.clone(); + + Ok(LayerResult { + metadata, + env, + exec_d_programs, + sboms, + }) + } + + fn migrate_incompatible_metadata( + &self, + _context: &BuildContext, + _metadata: &GenericMetadata, + ) -> Result< + libcnb::layer::MetadataMigration, + ::Error, + > { + Ok(libcnb::layer::MetadataMigration::ReplaceMetadata( + self.layer_result.metadata.clone(), + )) + } + + fn existing_layer_strategy( + &self, + _context: &BuildContext, + _layer_data: &LayerData, + ) -> Result::Error> { + Ok(ExistingLayerStrategy::Keep) + } +}