Skip to content
This repository has been archived by the owner on Nov 7, 2024. It is now read-only.

Improving the encapsulation (chunking) algorithm #456

Merged
merged 1 commit into from
May 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
630 changes: 564 additions & 66 deletions lib/src/chunking.rs

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lib/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ async fn container_export(
..Default::default()
};
let pushed =
crate::container::encapsulate(repo, rev, &config, Some(opts), None, imgref).await?;
crate::container::encapsulate(repo, rev, &config, None, Some(opts), None, imgref).await?;
println!("{}", pushed);
Ok(())
}
Expand Down
48 changes: 40 additions & 8 deletions lib/src/container/encapsulate.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! APIs for creating container images from OSTree commits

use super::ocidir::{Layer, OciDir};
use super::{ocidir, OstreeImageReference, Transport};
use super::{ocidir, OstreeImageReference, Transport, CONTENT_ANNOTATION};
use super::{ImageReference, SignatureSource, OSTREE_COMMIT_LABEL};
use crate::chunking::{Chunk, Chunking, ObjectMetaSized};
use crate::container::skopeo;
Expand Down Expand Up @@ -104,7 +104,7 @@ fn export_chunks(
ociw: &mut OciDir,
chunks: Vec<Chunk>,
opts: &ExportOpts,
) -> Result<Vec<(Layer, String)>> {
) -> Result<Vec<(Layer, String, Vec<String>)>> {
chunks
.into_iter()
.enumerate()
Expand All @@ -113,7 +113,7 @@ fn export_chunks(
ostree_tar::export_chunk(repo, commit, chunk.content, &mut w)
.with_context(|| format!("Exporting chunk {i}"))?;
let w = w.into_inner()?;
Ok((w.complete()?, chunk.name))
Ok((w.complete()?, chunk.name, chunk.packages))
})
.collect()
}
Expand Down Expand Up @@ -151,11 +151,20 @@ fn export_chunked(
.clone();

// Add the ostree layer
ociw.push_layer(manifest, imgcfg, ostree_layer, description);
ociw.push_layer(manifest, imgcfg, ostree_layer, description, None);
// Add the component/content layers
for (layer, name) in layers {
ociw.push_layer(manifest, imgcfg, layer, name.as_str());
for (layer, name, packages) in layers {
let mut annotation_component_layer = HashMap::new();
annotation_component_layer.insert(CONTENT_ANNOTATION.to_string(), packages.join(","));
ociw.push_layer(
manifest,
imgcfg,
layer,
name.as_str(),
Some(annotation_component_layer),
);
}

// This label (mentioned above) points to the last layer that is part of
// the ostree commit.
labels.insert(
Expand All @@ -167,13 +176,15 @@ fn export_chunked(

/// Generate an OCI image from a given ostree root
#[context("Building oci")]
#[allow(clippy::too_many_arguments)]
fn build_oci(
repo: &ostree::Repo,
rev: &str,
ocidir_path: &Path,
tag: Option<&str>,
config: &Config,
opts: ExportOpts,
prior_build: Option<&oci_image::ImageManifest>,
contentmeta: Option<crate::chunking::ObjectMetaSized>,
) -> Result<ImageReference> {
if !ocidir_path.exists() {
Expand Down Expand Up @@ -209,7 +220,15 @@ fn build_oci(
let mut manifest = ocidir::new_empty_manifest().build().unwrap();

let chunking = contentmeta
.map(|meta| crate::chunking::Chunking::from_mapping(repo, commit, meta, opts.max_layers))
.map(|meta| {
crate::chunking::Chunking::from_mapping(
repo,
commit,
meta,
&opts.max_layers,
prior_build,
)
})
.transpose()?;
// If no chunking was provided, create a logical single chunk.
let chunking = chunking
Expand Down Expand Up @@ -291,6 +310,7 @@ async fn build_impl(
repo: &ostree::Repo,
ostree_ref: &str,
config: &Config,
prior_build: Option<&oci_image::ImageManifest>,
opts: Option<ExportOpts>,
contentmeta: Option<ObjectMetaSized>,
dest: &ImageReference,
Expand All @@ -308,6 +328,7 @@ async fn build_impl(
tag,
config,
opts,
prior_build,
contentmeta,
)?;
None
Expand All @@ -323,6 +344,7 @@ async fn build_impl(
None,
config,
opts,
prior_build,
contentmeta,
)?;

Expand Down Expand Up @@ -377,9 +399,19 @@ pub async fn encapsulate<S: AsRef<str>>(
repo: &ostree::Repo,
ostree_ref: S,
config: &Config,
prior_build: Option<&oci_image::ImageManifest>,
opts: Option<ExportOpts>,
contentmeta: Option<ObjectMetaSized>,
dest: &ImageReference,
) -> Result<String> {
build_impl(repo, ostree_ref.as_ref(), config, opts, contentmeta, dest).await
build_impl(
repo,
ostree_ref.as_ref(),
config,
prior_build,
opts,
contentmeta,
dest,
)
.await
}
4 changes: 4 additions & 0 deletions lib/src/container/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ use std::str::FromStr;
/// The label injected into a container image that contains the ostree commit SHA-256.
pub const OSTREE_COMMIT_LABEL: &str = "ostree.commit";

/// The name of an annotation attached to a layer which names the packages/components
/// which are part of it.
pub(crate) const CONTENT_ANNOTATION: &str = "ostree.components";

/// Our generic catchall fatal error, expected to be converted
/// to a string to output to a terminal or logs.
type Result<T> = anyhow::Result<T>;
Expand Down
5 changes: 3 additions & 2 deletions lib/src/container/ocidir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ impl OciDir {
config: &mut oci_image::ImageConfiguration,
layer: Layer,
description: &str,
annotations: Option<HashMap<String, String>>,
) {
let annotations: Option<HashMap<String, String>> = None;
self.push_layer_annotated(manifest, config, layer, annotations, description);
}

Expand Down Expand Up @@ -531,7 +531,8 @@ mod tests {
let mut config = oci_image::ImageConfigurationBuilder::default()
.build()
.unwrap();
w.push_layer(&mut manifest, &mut config, root_layer, "root");
let annotations: Option<HashMap<String, String>> = None;
w.push_layer(&mut manifest, &mut config, root_layer, "root", annotations);
let config = w.write_config(config)?;
manifest.set_config(config);
w.replace_with_single_manifest(manifest.clone(), oci_image::Platform::default())?;
Expand Down
11 changes: 9 additions & 2 deletions lib/src/fixture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,9 @@ d tmp
"## };
pub const CONTENTS_CHECKSUM_V0: &str =
"5e41de82f9f861fa51e53ce6dd640a260e4fb29b7657f5a3f14157e93d2c0659";
pub static CONTENTS_V0_LEN: Lazy<usize> = Lazy::new(|| OWNERS.len().checked_sub(1).unwrap());
// 1 for ostree commit, 2 for max frequency packages, 3 as empty layer
pub const LAYERS_V0_LEN: usize = 3usize;
pub const PKGS_V0_LEN: usize = 7usize;

#[derive(Debug, PartialEq, Eq)]
enum SeLabel {
Expand Down Expand Up @@ -317,6 +319,7 @@ fn build_mapping_recurse(
name: Rc::clone(&owner),
srcid: Rc::clone(&owner),
change_time_offset: u32::MAX,
change_frequency: u32::MAX,
});
}

Expand Down Expand Up @@ -661,11 +664,15 @@ impl Fixture {
let contentmeta = self.get_object_meta().context("Computing object meta")?;
let contentmeta = ObjectMetaSized::compute_sizes(self.srcrepo(), contentmeta)
.context("Computing sizes")?;
let opts = ExportOpts::default();
let opts = ExportOpts {
max_layers: std::num::NonZeroU32::new(PKGS_V0_LEN as u32),
..Default::default()
};
let digest = crate::container::encapsulate(
self.srcrepo(),
self.testref(),
&config,
None,
Some(opts),
Some(contentmeta),
&imgref,
Expand Down
Binary file modified lib/src/fixtures/fedora-coreos-contentmeta.json.gz
Binary file not shown.
1 change: 1 addition & 0 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub mod objectsource;
pub(crate) mod objgv;
#[cfg(feature = "internal-testing-api")]
pub mod ostree_manual;
pub(crate) mod statistics;

mod utils;

Expand Down
6 changes: 3 additions & 3 deletions lib/src/objectsource.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,7 @@ pub struct ObjectSourceMeta {
/// Unique identifier, does not need to be human readable, but can be.
#[serde(with = "rcstr_serialize")]
pub identifier: ContentID,
/// Identifier for this source (e.g. package name-version, git repo).
/// Unlike the [`ContentID`], this should be human readable. It likely comes from an external source,
/// and may be re-serialized.
/// Just the name of the package (no version), needs to be human readable.
#[serde(with = "rcstr_serialize")]
pub name: Rc<str>,
/// Identifier for the *source* of this content; for example, if multiple binary
Expand All @@ -54,6 +52,8 @@ pub struct ObjectSourceMeta {
/// One suggested way to generate this number is to have it be in units of hours or days
/// since the earliest changed item.
pub change_time_offset: u32,
/// Change frequency
pub change_frequency: u32,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding the semver break label for this

}

impl PartialEq for ObjectSourceMeta {
Expand Down
109 changes: 109 additions & 0 deletions lib/src/statistics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
//! This module holds implementations of some basic statistical properties, such as mean and standard deviation.

pub(crate) fn mean(data: &[u64]) -> Option<f64> {
if data.is_empty() {
None
} else {
Some(data.iter().sum::<u64>() as f64 / data.len() as f64)
}
}

pub(crate) fn std_deviation(data: &[u64]) -> Option<f64> {
match (mean(data), data.len()) {
(Some(data_mean), count) if count > 0 => {
let variance = data
.iter()
.map(|value| {
let diff = data_mean - (*value as f64);
diff * diff
})
.sum::<f64>()
/ count as f64;
Some(variance.sqrt())
}
_ => None,
}
}

//Assumed sorted
pub(crate) fn median_absolute_deviation(data: &mut [u64]) -> Option<(f64, f64)> {
if data.is_empty() {
None
} else {
//Sort data
//data.sort_by(|a, b| a.partial_cmp(b).unwrap());

//Find median of data
let median_data: f64 = match data.len() % 2 {
1 => data[data.len() / 2] as f64,
_ => 0.5 * (data[data.len() / 2 - 1] + data[data.len() / 2]) as f64,
};

//Absolute deviations
let mut absolute_deviations = Vec::new();
for size in data {
absolute_deviations.push(f64::abs(*size as f64 - median_data))
}

absolute_deviations.sort_by(|a, b| a.partial_cmp(b).unwrap());
let l = absolute_deviations.len();
let mad: f64 = match l % 2 {
1 => absolute_deviations[l / 2],
_ => 0.5 * (absolute_deviations[l / 2 - 1] + absolute_deviations[l / 2]),
};

Some((median_data, mad))
}
}

#[test]
fn test_mean() {
assert_eq!(mean(&[]), None);
for v in [0u64, 1, 5, 100] {
assert_eq!(mean(&[v]), Some(v as f64));
}
assert_eq!(mean(&[0, 1]), Some(0.5));
assert_eq!(mean(&[0, 5, 100]), Some(35.0));
assert_eq!(mean(&[7, 4, 30, 14]), Some(13.75));
}

#[test]
fn test_std_deviation() {
assert_eq!(std_deviation(&[]), None);
for v in [0u64, 1, 5, 100] {
assert_eq!(std_deviation(&[v]), Some(0 as f64));
}
assert_eq!(std_deviation(&[1, 4]), Some(1.5));
assert_eq!(std_deviation(&[2, 2, 2, 2]), Some(0.0));
assert_eq!(
std_deviation(&[1, 20, 300, 4000, 50000, 600000, 7000000, 80000000]),
Some(26193874.56387471)
);
}

#[test]
fn test_median_absolute_deviation() {
RishabhSaini marked this conversation as resolved.
Show resolved Hide resolved
//Assumes sorted
assert_eq!(median_absolute_deviation(&mut []), None);
for v in [0u64, 1, 5, 100] {
assert_eq!(median_absolute_deviation(&mut [v]), Some((v as f64, 0.0)));
}
assert_eq!(median_absolute_deviation(&mut [1, 4]), Some((2.5, 1.5)));
assert_eq!(
median_absolute_deviation(&mut [2, 2, 2, 2]),
Some((2.0, 0.0))
);
assert_eq!(
median_absolute_deviation(&mut [
1, 2, 3, 3, 4, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 9, 12, 52, 90
]),
Some((6.0, 2.0))
);

//if more than half of the data has the same value, MAD = 0, thus any
//value different from the residual median is classified as an outlier
assert_eq!(
median_absolute_deviation(&mut [0, 1, 1, 1, 1, 1, 1, 1, 0]),
Some((1.0, 0.0))
);
}
Loading