Skip to content

Commit

Permalink
Fix handling of empty and singleton keys; set default back to 1
Browse files Browse the repository at this point in the history
  • Loading branch information
sffc committed Nov 1, 2023
1 parent b45b5fe commit 42cbfa3
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 68 deletions.
119 changes: 76 additions & 43 deletions provider/blob/src/blob_data_provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,61 +150,94 @@ mod test {

#[test]
fn test_empty() {
let mut blob: Vec<u8> = Vec::new();
let mut version = 1;

{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));
while version <= 2 {
let mut blob: Vec<u8> = Vec::new();

exporter.flush(HelloWorldV1Marker::KEY).unwrap();
{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));

exporter.close().unwrap();
}
if version == 1 {
exporter.set_v1();
} else {
exporter.set_v2();
}

exporter.flush(HelloWorldV1Marker::KEY).unwrap();

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();
exporter.close().unwrap();
}

assert!(matches!(
provider.load_buffer(HelloWorldV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
));
let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(
matches!(
provider.load_buffer(HelloWorldV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
),
"(version: {version})"
);

version += 1;
}
}

#[test]
fn test_singleton() {
let mut blob: Vec<u8> = Vec::new();
let mut version = 1;

{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));
while version <= 2 {
let mut blob: Vec<u8> = Vec::new();

exporter.flush(HelloSingletonV1Marker::KEY).unwrap();
{
let mut exporter = BlobExporter::new_with_sink(Box::new(&mut blob));

exporter.close().unwrap();
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(matches!(
provider.load_buffer(
HelloSingletonV1Marker::KEY,
DataRequest {
locale: &icu_locid::locale!("de").into(),
metadata: Default::default()
if version == 1 {
exporter.set_v1();
} else {
exporter.set_v2();
}
),
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
})
));

assert!(matches!(
provider.load_buffer(HelloSingletonV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
));

exporter.flush(HelloSingletonV1Marker::KEY).unwrap();

exporter.close().unwrap();
}

let provider = BlobDataProvider::try_new_from_blob(blob.into()).unwrap();

assert!(
matches!(
provider.load_buffer(
HelloSingletonV1Marker::KEY,
DataRequest {
locale: &icu_locid::locale!("de").into(),
metadata: Default::default()
}
),
Err(DataError {
kind: DataErrorKind::ExtraneousLocale,
..
})
),
"(version: {version})"
);

assert!(
matches!(
provider.load_buffer(HelloSingletonV1Marker::KEY, Default::default()),
Err(DataError {
kind: DataErrorKind::MissingLocale,
..
})
),
"(version: {version})"
);

version += 1;
}
}
}
5 changes: 4 additions & 1 deletion provider/blob/src/blob_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ impl<'data> BlobSchemaV2<'data> {
.binary_search(&key.hashed())
.ok()
.ok_or_else(|| DataErrorKind::MissingDataKey.with_req(key, req))?;
if key.metadata().singleton && !req.locale.is_empty() {
return Err(DataErrorKind::ExtraneousLocale.with_req(key, req));
}
let zerotrie = self
.locales
.get(idx0)
Expand All @@ -181,7 +184,7 @@ impl<'data> BlobSchemaV2<'data> {
debug_assert_eq!(self.keys.len(), self.locales.len());
// Note: We could check that every index occurs at least once, but that's a more expensive
// operation, so we will just check for the min and max index.
let mut seen_min = false;
let mut seen_min = self.buffers.is_empty();
let mut seen_max = self.buffers.is_empty();
for zerotrie in self.locales.iter() {
for (_locale, idx) in ZeroTrieSimpleAscii::from_store(zerotrie).iter() {
Expand Down
48 changes: 24 additions & 24 deletions provider/blob/src/export/blob_exporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
use crate::blob_schema::*;
use icu_provider::datagen::*;
use icu_provider::prelude::*;
use std::collections::{BTreeMap, HashMap};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::sync::Mutex;
use writeable::Writeable;
use zerotrie::ZeroTrieSimpleAscii;
Expand All @@ -32,7 +32,7 @@ pub struct BlobExporter<'w> {
#[allow(clippy::type_complexity)]
resources: Mutex<BTreeMap<DataKeyHash, BTreeMap<Vec<u8>, usize>>>,
// All seen keys
all_keys: Mutex<Vec<DataKeyHash>>,
all_keys: Mutex<BTreeSet<DataKeyHash>>,
/// Map from blob to blob ID
unique_resources: Mutex<HashMap<Vec<u8>, usize>>,
sink: Box<dyn std::io::Write + Sync + 'w>,
Expand All @@ -56,9 +56,9 @@ impl<'w> BlobExporter<'w> {
Self {
resources: Mutex::new(BTreeMap::new()),
unique_resources: Mutex::new(HashMap::new()),
all_keys: Mutex::new(Vec::new()),
all_keys: Mutex::new(BTreeSet::new()),
sink,
version: VersionConfig::V002,
version: VersionConfig::V001,
}
}

Expand Down Expand Up @@ -105,7 +105,7 @@ impl DataExporter for BlobExporter<'_> {
}

fn flush(&self, key: DataKey) -> Result<(), DataError> {
self.all_keys.lock().expect("poison").push(key.hashed());
self.all_keys.lock().expect("poison").insert(key.hashed());
Ok(())
}

Expand Down Expand Up @@ -199,26 +199,26 @@ impl BlobExporter<'_> {
fn close_v2(&mut self) -> Result<(), DataError> {
let FinalizedBuffers { vzv, remap } = self.finalize_buffers();

let keys: ZeroVec<DataKeyHash> = self
.resources
.lock()
.expect("poison")
.keys()
.copied()
.collect();
let all_keys = self.all_keys.lock().expect("poison");
let resources = self.resources.lock().expect("poison");

let locales_vec: Vec<Vec<u8>> = self
.resources
.lock()
.expect("poison")
.values()
.map(|sub_map| {
let mut sub_map = sub_map.clone();
sub_map
.iter_mut()
.for_each(|(_, id)| *id = *remap.get(id).expect("in-bound index"));
let zerotrie = ZeroTrieSimpleAscii::try_from(&sub_map).expect("in-bounds");
zerotrie.take_store()
let keys: ZeroVec<DataKeyHash> = all_keys.iter().copied().collect();

let locales_vec: Vec<Vec<u8>> = all_keys
.iter()
.map(|data_key_hash| resources.get(data_key_hash))
.map(|option_sub_map| {
if let Some(sub_map) = option_sub_map {
let mut sub_map = sub_map.clone();
sub_map
.iter_mut()
.for_each(|(_, id)| *id = *remap.get(id).expect("in-bound index"));
let zerotrie = ZeroTrieSimpleAscii::try_from(&sub_map).expect("in-bounds");
zerotrie.take_store()
} else {
// Key with no locales: insert an empty ZeroTrie
ZeroTrieSimpleAscii::default().take_store()
}
})
.collect();

Expand Down

0 comments on commit 42cbfa3

Please sign in to comment.