diff --git a/utils/zerotrie/benches/overview.rs b/utils/zerotrie/benches/overview.rs index 832274378fa..7990a89eb42 100644 --- a/utils/zerotrie/benches/overview.rs +++ b/utils/zerotrie/benches/overview.rs @@ -13,8 +13,10 @@ use zerotrie::ZeroTrieSimpleAscii; use zerovec::ZeroHashMap; #[cfg(feature = "bench")] use zerovec::ZeroMap; +use zerotrie::ByteStr; mod testdata { + use zerotrie::ByteStr; include!("../tests/data/data.rs"); } @@ -137,7 +139,7 @@ fn get_subtags_bench_large(c: &mut Criterion) { fn get_subtags_bench_helper( mut g: criterion::BenchmarkGroup, strings: &[&str], - litemap: LiteMap<&[u8], usize>, + litemap: LiteMap<&ByteStr, usize>, ) { g.bench_function("SimpleAscii", |b| { let trie = ZeroTrieSimpleAscii::try_from(&litemap).unwrap(); @@ -171,7 +173,7 @@ fn get_subtags_bench_helper( #[cfg(feature = "bench")] g.bench_function("ZeroMap/usize", |b| { - let zm: ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zm: ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { let actual = black_box(&zm).get_copied(key.as_bytes()); @@ -182,7 +184,7 @@ fn get_subtags_bench_helper( #[cfg(feature = "bench")] g.bench_function("ZeroMap/u8", |b| { - let zm: ZeroMap<[u8], u8> = litemap.iter().map(|(k, v)| (*k, *v as u8)).collect(); + let zm: ZeroMap<[u8], u8> = litemap.iter().map(|(k, v)| (k.as_bytes(), *v as u8)).collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { let actual = black_box(&zm).get_copied(key.as_bytes()); @@ -193,7 +195,7 @@ fn get_subtags_bench_helper( #[cfg(feature = "bench")] g.bench_function("HashMap", |b| { - let hm: HashMap<&[u8], usize> = litemap.iter().map(|(a, b)| (*a, *b)).collect(); + let hm: HashMap<&[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b)).collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { let actual = black_box(&hm).get(key.as_bytes()); @@ -206,7 +208,7 @@ fn get_subtags_bench_helper( g.bench_function("ZeroHashMap/usize", |b| { let zhm: ZeroHashMap<[u8], usize> = litemap .iter() - .map(|(a, b)| (*a, b)) + .map(|(a, b)| (a.as_bytes(), b)) .collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { @@ -220,7 +222,7 @@ fn get_subtags_bench_helper( #[cfg(feature = "bench")] g.bench_function("ZeroHashMap/u8", |b| { - let zhm: ZeroHashMap<[u8], u8> = litemap.iter().map(|(k, v)| (*k, *v as u8)).collect(); + let zhm: ZeroHashMap<[u8], u8> = litemap.iter().map(|(k, v)| (k.as_bytes(), *v as u8)).collect(); b.iter(|| { for (i, key) in black_box(strings).iter().enumerate() { let actual = black_box(&zhm).get(key.as_bytes()).copied(); diff --git a/utils/zerotrie/examples/first_weekday_for_region.rs b/utils/zerotrie/examples/first_weekday_for_region.rs index 25a025b7c04..f624f41faad 100644 --- a/utils/zerotrie/examples/first_weekday_for_region.rs +++ b/utils/zerotrie/examples/first_weekday_for_region.rs @@ -20,7 +20,6 @@ mod weekday { // This data originated from CLDR 41. static DATA: &[(&str, usize)] = &[ - ("001", weekday::MON), ("AD", weekday::MON), ("AE", weekday::SAT), ("AF", weekday::SAT), @@ -129,6 +128,7 @@ static DATA: &[(&str, usize)] = &[ ("NP", weekday::SUN), ("NZ", weekday::MON), ("OM", weekday::SAT), + ("001", weekday::MON), ("PA", weekday::SUN), ("PE", weekday::SUN), ("PH", weekday::SUN), diff --git a/utils/zerotrie/src/builder/bytestr.rs b/utils/zerotrie/src/builder/bytestr.rs index 9910efd7ffd..80ba4c3e256 100644 --- a/utils/zerotrie/src/builder/bytestr.rs +++ b/utils/zerotrie/src/builder/bytestr.rs @@ -2,62 +2,78 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use core::borrow::Borrow; +use crate::comparison; +use core::cmp::Ordering; +use core::fmt; #[cfg(feature = "serde")] use alloc::boxed::Box; -/// A struct transparent over `[u8]` with convenient helper functions. +/// A string key in a ZeroTrie. +/// +/// This type has a custom Ord impl, making it suitable for use in a sorted +/// map for ZeroTrie construction. #[repr(transparent)] -#[derive(PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct ByteStr([u8]); +#[derive(PartialEq, Eq)] +pub struct ByteStr([u8]); impl ByteStr { - pub const fn from_byte_slice_with_value<'a, 'l>( + #[inline] + pub(crate) const fn from_byte_slice_with_value<'a, 'l>( input: &'l [(&'a [u8], usize)], ) -> &'l [(&'a ByteStr, usize)] { // Safety: [u8] and ByteStr have the same layout and invariants unsafe { core::mem::transmute(input) } } - pub const fn from_str_slice_with_value<'a, 'l>( + #[inline] + pub(crate) const fn from_str_slice_with_value<'a, 'l>( input: &'l [(&'a str, usize)], ) -> &'l [(&'a ByteStr, usize)] { // Safety: str and ByteStr have the same layout, and ByteStr is less restrictive unsafe { core::mem::transmute(input) } } - pub fn from_bytes(input: &[u8]) -> &Self { + /// Casts a `&[u8]` to a `&ByteStr` + #[inline] + pub const fn from_bytes(input: &[u8]) -> &Self { // Safety: [u8] and ByteStr have the same layout and invariants unsafe { core::mem::transmute(input) } } - #[cfg(feature = "serde")] - pub fn from_boxed_bytes(input: Box<[u8]>) -> Box { + /// Casts a `Box<[u8]>` to a `Box` + #[cfg(feature = "alloc")] + pub const fn from_boxed_bytes(input: Box<[u8]>) -> Box { // Safety: [u8] and ByteStr have the same layout and invariants unsafe { core::mem::transmute(input) } } - #[allow(dead_code)] // may want this in the future - pub fn from_str(input: &str) -> &Self { + /// Casts a `&str` to a `&ByteStr` + pub const fn from_str(input: &str) -> &Self { Self::from_bytes(input.as_bytes()) } - #[allow(dead_code)] // may want this in the future - pub fn empty() -> &'static Self { + /// Creates an empty ByteStr + pub const fn empty() -> &'static Self { Self::from_bytes(&[]) } - #[allow(dead_code)] // not used in all features + /// Returns this ByteStr as a byte slice pub const fn as_bytes(&self) -> &[u8] { &self.0 } + /// Whether the ByteStr is an empty slice + pub const fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// How many bytes are in the ByteStr pub const fn len(&self) -> usize { self.0.len() } - #[allow(dead_code)] // not used in all features + /// Whether the ByteStr is all ASCII-range pub fn is_all_ascii(&self) -> bool { for byte in self.0.iter() { if !byte.is_ascii() { @@ -78,13 +94,15 @@ impl ByteStr { } /// Const function to evaluate `self < other`. - pub(crate) const fn is_less_then(&self, other: &Self) -> bool { + pub(crate) const fn is_less_than(&self, other: &Self) -> bool { let mut i = 0; while i < self.len() && i < other.len() { - if self.0[i] < other.0[i] { + let a = comparison::shift(self.0[i]); + let b = comparison::shift(other.0[i]); + if a < b { return true; } - if self.0[i] > other.0[i] { + if a > b { return false; } i += 1; @@ -107,15 +125,47 @@ impl ByteStr { } } -impl Borrow<[u8]> for ByteStr { - fn borrow(&self) -> &[u8] { +// Note: Does NOT impl Borrow<[u8]> because the Ord impls differ. +// AsRef is okay to implement. + +impl AsRef<[u8]> for ByteStr { + fn as_ref(&self) -> &[u8] { self.as_bytes() } } -#[cfg(feature = "alloc")] -impl Borrow<[u8]> for alloc::boxed::Box { - fn borrow(&self) -> &[u8] { - self.as_bytes() +impl AsRef for ByteStr { + fn as_ref(&self) -> &ByteStr { + self + } +} + +impl<'a> From<&'a str> for &'a ByteStr { + fn from(other: &'a str) -> Self { + ByteStr::from_str(other) + } +} + +impl fmt::Debug for ByteStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + if let Ok(s) = core::str::from_utf8(self.as_bytes()) { + write!(f, "{s}") + } else { + write!(f, "{:?}", self.as_bytes()) + } + } +} + +impl Ord for ByteStr { + #[inline] + fn cmp(&self, other: &Self) -> Ordering { + crate::comparison::cmp_slices(&self.0, &other.0) + } +} + +impl PartialOrd for ByteStr { + #[inline] + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) } } diff --git a/utils/zerotrie/src/builder/konst/builder.rs b/utils/zerotrie/src/builder/konst/builder.rs index 4e7132a0821..b23a2950539 100644 --- a/utils/zerotrie/src/builder/konst/builder.rs +++ b/utils/zerotrie/src/builder/konst/builder.rs @@ -117,7 +117,7 @@ impl ZeroTrieBuilderConst { match prev { None => (), Some(prev) => { - if !prev.is_less_then(ascii_str) { + if !prev.is_less_than(ascii_str) { panic!("Strings in ByteStr constructor are not sorted"); } } diff --git a/utils/zerotrie/src/builder/litemap.rs b/utils/zerotrie/src/builder/litemap.rs index 6577b69357e..8b5d698b533 100644 --- a/utils/zerotrie/src/builder/litemap.rs +++ b/utils/zerotrie/src/builder/litemap.rs @@ -15,14 +15,13 @@ use litemap::LiteMap; impl ZeroTrieSimpleAscii> { #[doc(hidden)] - pub fn try_from_litemap_with_const_builder<'a, S>( - items: &LiteMap<&'a [u8], usize, S>, + pub fn try_from_litemap_with_const_builder<'a, 'b, S>( + items: &'a LiteMap<&'b ByteStr, usize, S>, ) -> Result where - S: litemap::store::StoreSlice<&'a [u8], usize, Slice = [(&'a [u8], usize)]>, + S: litemap::store::StoreSlice<&'b ByteStr, usize, Slice = [(&'b ByteStr, usize)]>, { - let tuples = items.as_slice(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(tuples); + let byte_str_slice = items.as_slice(); ZeroTrieBuilderConst::<10000>::from_sorted_const_tuple_slice::<100>(byte_str_slice.into()) .map(|s| Self { store: s.as_bytes().to_vec(), @@ -30,18 +29,17 @@ impl ZeroTrieSimpleAscii> { } } -impl<'a, K, S> TryFrom<&'a LiteMap> for ZeroTrie> +impl<'a, 'b, K, S> TryFrom<&'a LiteMap> for ZeroTrie> where // Borrow, not AsRef, because we rely on Ord being the same. Unfortunately // this means `LiteMap<&str, usize>` does not work. - K: Borrow<[u8]>, + K: Borrow, S: litemap::store::StoreSlice, { type Error = ZeroTrieBuildError; fn try_from(items: &LiteMap) -> Result { - let byte_litemap = items.to_borrowed_keys::<[u8], Vec<_>>(); - let byte_slice = byte_litemap.as_slice(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(byte_slice); + let byte_litemap = items.to_borrowed_keys::>(); + let byte_str_slice = byte_litemap.as_slice(); Self::try_from_tuple_slice(byte_str_slice) } } diff --git a/utils/zerotrie/src/builder/mod.rs b/utils/zerotrie/src/builder/mod.rs index 8086cc14dbd..1e3830bc46a 100644 --- a/utils/zerotrie/src/builder/mod.rs +++ b/utils/zerotrie/src/builder/mod.rs @@ -152,7 +152,7 @@ mod litemap; #[cfg(feature = "alloc")] pub(crate) mod nonconst; -use bytestr::ByteStr; +pub use bytestr::ByteStr; use super::ZeroTrieSimpleAscii; diff --git a/utils/zerotrie/src/builder/nonconst/builder.rs b/utils/zerotrie/src/builder/nonconst/builder.rs index 02dd062f82f..5ccd58dde63 100644 --- a/utils/zerotrie/src/builder/nonconst/builder.rs +++ b/utils/zerotrie/src/builder/nonconst/builder.rs @@ -12,7 +12,6 @@ use crate::byte_phf::PerfectByteHashMapCacheOwned; use crate::error::ZeroTrieBuildError; use crate::options::*; use crate::varint; -use alloc::borrow::Cow; use alloc::vec::Vec; /// A low-level builder for ZeroTrie. Supports all options. @@ -101,12 +100,11 @@ impl ZeroTrieBuilder { let items = Vec::<(K, usize)>::from_iter(iter); let mut items = items .iter() - .map(|(k, v)| (k.as_ref(), *v)) - .collect::>(); - items.sort_by(|a, b| cmp_keys_values(&options, *a, *b)); - let ascii_str_slice = items.as_slice(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(ascii_str_slice); - Self::from_sorted_tuple_slice_impl(byte_str_slice, options) + .map(|(k, v)| (ByteStr::from_bytes(k.as_ref()), *v)) + .collect::>(); + items.sort_by(|a, b| cmp_keys_values(*a, *b)); + let byte_str_slice = items.as_slice(); + Self::from_sorted_tuple_slice(byte_str_slice, options) } /// Builds a ZeroTrie with the given items and options. Assumes that the items are sorted, @@ -118,29 +116,16 @@ impl ZeroTrieBuilder { pub fn from_sorted_tuple_slice( items: &[(&ByteStr, usize)], options: ZeroTrieBuilderOptions, - ) -> Result { - let mut items = Cow::Borrowed(items); - if matches!(options.case_sensitivity, CaseSensitivity::IgnoreCase) { - // We need to re-sort the items with our custom comparator. - items.to_mut().sort_by(|a, b| { - cmp_keys_values(&options, (a.0.as_bytes(), a.1), (b.0.as_bytes(), b.1)) - }); - } - Self::from_sorted_tuple_slice_impl(&items, options) - } - - /// Internal constructor that does not re-sort the items. - fn from_sorted_tuple_slice_impl( - items: &[(&ByteStr, usize)], - options: ZeroTrieBuilderOptions, ) -> Result { for ab in items.windows(2) { - debug_assert!(cmp_keys_values( - &options, - (ab[0].0.as_bytes(), ab[0].1), - (ab[1].0.as_bytes(), ab[1].1) - ) - .is_lt()); + debug_assert!( + cmp_keys_values( + (&ab[0].0, ab[0].1), + (&ab[1].0, ab[1].1) + ) + .is_lt(), + "{ab:?}" + ); } let mut result = Self { data: S::atbs_new_empty(), @@ -403,16 +388,8 @@ impl ZeroTrieBuilder { } fn cmp_keys_values( - options: &ZeroTrieBuilderOptions, - a: (&[u8], usize), - b: (&[u8], usize), + a: (&ByteStr, usize), + b: (&ByteStr, usize), ) -> Ordering { - if matches!(options.case_sensitivity, CaseSensitivity::Sensitive) { - a.0.cmp(b.0) - } else { - let a_iter = a.0.iter().map(|x| x.to_ascii_lowercase()); - let b_iter = b.0.iter().map(|x| x.to_ascii_lowercase()); - Iterator::cmp(a_iter, b_iter) - } - .then_with(|| a.1.cmp(&b.1)) + a.0.cmp(b.0).then_with(|| a.1.cmp(&b.1)) } diff --git a/utils/zerotrie/src/comparison.rs b/utils/zerotrie/src/comparison.rs new file mode 100644 index 00000000000..debdf2bc4b5 --- /dev/null +++ b/utils/zerotrie/src/comparison.rs @@ -0,0 +1,60 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! A comparator that keeps lowercase and uppercase ASCII letters adjacent. + +use core::cmp::Ordering; + +#[inline] +pub(crate) const fn shift(x: u8) -> u8 { + (x << 3) | (x >> 5) +} + +#[inline] +pub(crate) fn cmp(a: u8, b: u8) -> Ordering { + shift(a).cmp(&shift(b)) +} + +#[inline] +pub(crate) fn cmpi(a: u8, b: u8) -> Ordering { + shift(a.to_ascii_lowercase()).cmp(&shift(b.to_ascii_lowercase())) +} + +#[inline] +pub(crate) fn cmp_slices(a: &[u8], b: &[u8]) -> Ordering { + let a_iter = a.iter().copied().map(shift); + let b_iter = b.iter().copied().map(shift); + Iterator::cmp(a_iter, b_iter) +} + +#[test] +fn test_basic_cmp() { + let mut all_bytes = (0u8..=255u8).collect::>(); + all_bytes.sort_by(|a, b| cmp(*a, *b)); + + assert_eq!(cmp(b'A', b'a'), Ordering::Less); + assert_eq!(cmp(b'B', b'b'), Ordering::Less); + assert_eq!(cmp(b'a', b'B'), Ordering::Less); + + assert_eq!(cmpi(b'A', b'a'), Ordering::Equal); + assert_eq!(cmpi(b'B', b'b'), Ordering::Equal); + assert_eq!(cmpi(b'a', b'B'), Ordering::Less); + + let full_order = [ + 0, 32, 64, 96, 128, 160, 192, 224, 1, 33, 65, 97, 129, 161, 193, 225, 2, 34, 66, 98, 130, + 162, 194, 226, 3, 35, 67, 99, 131, 163, 195, 227, 4, 36, 68, 100, 132, 164, 196, 228, 5, + 37, 69, 101, 133, 165, 197, 229, 6, 38, 70, 102, 134, 166, 198, 230, 7, 39, 71, 103, 135, + 167, 199, 231, 8, 40, 72, 104, 136, 168, 200, 232, 9, 41, 73, 105, 137, 169, 201, 233, 10, + 42, 74, 106, 138, 170, 202, 234, 11, 43, 75, 107, 139, 171, 203, 235, 12, 44, 76, 108, 140, + 172, 204, 236, 13, 45, 77, 109, 141, 173, 205, 237, 14, 46, 78, 110, 142, 174, 206, 238, + 15, 47, 79, 111, 143, 175, 207, 239, 16, 48, 80, 112, 144, 176, 208, 240, 17, 49, 81, 113, + 145, 177, 209, 241, 18, 50, 82, 114, 146, 178, 210, 242, 19, 51, 83, 115, 147, 179, 211, + 243, 20, 52, 84, 116, 148, 180, 212, 244, 21, 53, 85, 117, 149, 181, 213, 245, 22, 54, 86, + 118, 150, 182, 214, 246, 23, 55, 87, 119, 151, 183, 215, 247, 24, 56, 88, 120, 152, 184, + 216, 248, 25, 57, 89, 121, 153, 185, 217, 249, 26, 58, 90, 122, 154, 186, 218, 250, 27, 59, + 91, 123, 155, 187, 219, 251, 28, 60, 92, 124, 156, 188, 220, 252, 29, 61, 93, 125, 157, + 189, 221, 253, 30, 62, 94, 126, 158, 190, 222, 254, 31, 63, 95, 127, 159, 191, 223, 255, + ]; + assert_eq!(all_bytes, full_order); +} diff --git a/utils/zerotrie/src/lib.rs b/utils/zerotrie/src/lib.rs index 0f6e05f0814..c82977a0bc0 100644 --- a/utils/zerotrie/src/lib.rs +++ b/utils/zerotrie/src/lib.rs @@ -56,6 +56,7 @@ extern crate alloc; mod builder; mod byte_phf; +mod comparison; pub mod cursor; mod error; #[macro_use] @@ -73,6 +74,7 @@ pub use crate::zerotrie::ZeroTrieExtendedCapacity; pub use crate::zerotrie::ZeroTriePerfectHash; pub use crate::zerotrie::ZeroTrieSimpleAscii; pub use error::ZeroTrieBuildError; +pub use builder::ByteStr; #[cfg(feature = "alloc")] pub use crate::zerotrie::ZeroTrieStringIterator; diff --git a/utils/zerotrie/src/reader.rs b/utils/zerotrie/src/reader.rs index eed1c80aaad..400cca5ddbd 100644 --- a/utils/zerotrie/src/reader.rs +++ b/utils/zerotrie/src/reader.rs @@ -204,6 +204,7 @@ //! ``` use crate::byte_phf::PerfectByteHashMap; +use crate::comparison; use crate::cursor::AsciiProbeResult; use crate::helpers::*; use crate::options::*; @@ -367,14 +368,11 @@ pub(crate) fn get_parameterized( if matches!(T::OPTIONS.phf_mode, PhfMode::BinaryOnly) || x < 16 { // binary search (search, trie) = trie.debug_split_at(x); - let bsearch_result = - if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) { - search.binary_search_by_key(&c.to_ascii_lowercase(), |x| { - x.to_ascii_lowercase() - }) - } else { - search.binary_search(c) - }; + let bsearch_result = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) { + search.binary_search_by(|p| comparison::cmpi(*p, *c)) + } else { + search.binary_search_by(|p| comparison::cmp(*p, *c)) + }; i = bsearch_result.ok()?; } else { // phf @@ -486,9 +484,9 @@ pub(crate) fn step_parameterized( // Always use binary search (search, *trie) = trie.debug_split_at(x); let bsearch_result = if matches!(T::OPTIONS.case_sensitivity, CaseSensitivity::IgnoreCase) { - search.binary_search_by_key(&c.to_ascii_lowercase(), |x| x.to_ascii_lowercase()) + search.binary_search_by(|p| comparison::cmpi(*p, c)) } else { - search.binary_search(&c) + search.binary_search_by(|p| comparison::cmp(*p, c)) }; match bsearch_result { Ok(i) => { diff --git a/utils/zerotrie/src/serde.rs b/utils/zerotrie/src/serde.rs index 48ae87193e8..d7bf2f54fe8 100644 --- a/utils/zerotrie/src/serde.rs +++ b/utils/zerotrie/src/serde.rs @@ -359,6 +359,7 @@ where #[cfg(test)] mod testdata { + use crate::ByteStr; include!("../tests/data/data.rs"); } diff --git a/utils/zerotrie/src/zerotrie.rs b/utils/zerotrie/src/zerotrie.rs index 21d6b430de2..b113de8ca65 100644 --- a/utils/zerotrie/src/zerotrie.rs +++ b/utils/zerotrie/src/zerotrie.rs @@ -47,11 +47,12 @@ use litemap::LiteMap; /// ``` /// use litemap::LiteMap; /// use zerotrie::ZeroTrie; +/// use zerotrie::ByteStr; /// -/// let mut map = LiteMap::<&[u8], usize>::new_vec(); -/// map.insert("foo".as_bytes(), 1); -/// map.insert("bar".as_bytes(), 2); -/// map.insert("bazzoo".as_bytes(), 3); +/// let mut map = LiteMap::<&ByteStr, usize>::new_vec(); +/// map.insert("foo".into(), 1); +/// map.insert("bar".into(), 2); +/// map.insert("bazzoo".into(), 3); /// /// let trie = ZeroTrie::try_from(&map)?; /// @@ -82,12 +83,13 @@ pub(crate) enum ZeroTrieFlavor { /// /// ``` /// use litemap::LiteMap; +/// use zerotrie::ByteStr; /// use zerotrie::ZeroTrieSimpleAscii; /// -/// let mut map = LiteMap::new_vec(); -/// map.insert(&b"foo"[..], 1); -/// map.insert(b"bar", 2); -/// map.insert(b"bazzoo", 3); +/// let mut map = LiteMap::<&ByteStr, _>::new_vec(); +/// map.insert("foo".into(), 1); +/// map.insert("bar".into(), 2); +/// map.insert("bazzoo".into(), 3); /// /// let trie = ZeroTrieSimpleAscii::try_from(&map)?; /// @@ -134,12 +136,13 @@ impl ZeroTrieSimpleAscii { /// /// ``` /// use litemap::LiteMap; +/// use zerotrie::ByteStr; /// use zerotrie::ZeroAsciiIgnoreCaseTrie; /// -/// let mut map = LiteMap::new_vec(); -/// map.insert(&b"foo"[..], 1); -/// map.insert(b"Bar", 2); -/// map.insert(b"Bazzoo", 3); +/// let mut map = LiteMap::<&ByteStr, _>::new_vec(); +/// map.insert("foo".into(), 1); +/// map.insert("Bar".into(), 2); +/// map.insert("Bazzoo".into(), 3); /// /// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?; /// @@ -156,14 +159,15 @@ impl ZeroTrieSimpleAscii { /// /// ``` /// use litemap::LiteMap; +/// use zerotrie::ByteStr; /// use zerotrie::ZeroAsciiIgnoreCaseTrie; /// -/// let mut map = LiteMap::new_vec(); -/// map.insert(&b"bar"[..], 1); +/// let mut map = LiteMap::<&ByteStr, _>::new_vec(); +/// map.insert("bar".into(), 1); /// // OK: 'r' and 'Z' are different letters -/// map.insert(b"baZ", 2); +/// map.insert("baZ".into(), 2); /// // Bad: we already inserted 'r' so we cannot also insert 'R' at the same position -/// map.insert(b"baR", 2); +/// map.insert("baR".into(), 2); /// /// ZeroAsciiIgnoreCaseTrie::try_from(&map).expect_err("mixed-case strings!"); /// ``` @@ -187,12 +191,13 @@ pub struct ZeroAsciiIgnoreCaseTrie { /// /// ``` /// use litemap::LiteMap; +/// use zerotrie::ByteStr; /// use zerotrie::ZeroTriePerfectHash; /// -/// let mut map = LiteMap::<&[u8], usize>::new_vec(); -/// map.insert("foo".as_bytes(), 1); -/// map.insert("bår".as_bytes(), 2); -/// map.insert("båzzøø".as_bytes(), 3); +/// let mut map = LiteMap::<&ByteStr, usize>::new_vec(); +/// map.insert("foo".into(), 1); +/// map.insert("bår".into(), 2); +/// map.insert("båzzøø".into(), 3); /// /// let trie = ZeroTriePerfectHash::try_from(&map)?; /// @@ -483,17 +488,16 @@ macro_rules! impl_zerotrie_subtype { #[cfg(feature = "litemap")] impl<'a, K, S> TryFrom<&'a LiteMap> for $name> where - K: Borrow<[u8]>, + K: Borrow, S: litemap::store::StoreIterable<'a, K, usize>, { type Error = crate::error::ZeroTrieBuildError; fn try_from(map: &'a LiteMap) -> Result { - let tuples: Vec<(&[u8], usize)> = map + let byte_str_slice: Vec<(&ByteStr, usize)> = map .iter() .map(|(k, v)| (k.borrow(), *v)) .collect(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples); - Self::try_from_tuple_slice(byte_str_slice) + Self::try_from_tuple_slice(&byte_str_slice) } } #[cfg(feature = "litemap")] @@ -799,11 +803,10 @@ where fn from_iter>(iter: T) -> Self { // We need two Vecs because the first one anchors the `K`s that the second one borrows. let items = Vec::from_iter(iter); - let mut items: Vec<(&[u8], usize)> = items.iter().map(|(k, v)| (k.as_ref(), *v)).collect(); + let mut items: Vec<(&ByteStr, usize)> = items.iter().map(|(k, v)| (ByteStr::from_bytes(k.as_ref()), *v)).collect(); items.sort(); - let byte_str_slice = ByteStr::from_byte_slice_with_value(&items); #[allow(clippy::unwrap_used)] // FromIterator is panicky - Self::try_from_tuple_slice(byte_str_slice).unwrap() + Self::try_from_tuple_slice(&items).unwrap() } } diff --git a/utils/zerotrie/tests/asciitrie_test.rs b/utils/zerotrie/tests/asciitrie_test.rs index ae9dcbcf9e3..5e83bd42239 100644 --- a/utils/zerotrie/tests/asciitrie_test.rs +++ b/utils/zerotrie/tests/asciitrie_test.rs @@ -9,6 +9,7 @@ use zerotrie::ZeroTrieSimpleAscii; use zerovec::ZeroMap; mod testdata { + use zerotrie::ByteStr; include!("data/data.rs"); } diff --git a/utils/zerotrie/tests/builder_test.rs b/utils/zerotrie/tests/builder_test.rs index d92ad8520dc..e8e22d69dfb 100644 --- a/utils/zerotrie/tests/builder_test.rs +++ b/utils/zerotrie/tests/builder_test.rs @@ -5,8 +5,10 @@ use litemap::LiteMap; use zerotrie::ZeroTriePerfectHash; use zerotrie::ZeroTrieSimpleAscii; +use zerotrie::ByteStr; mod testdata { + use zerotrie::ByteStr; include!("data/data.rs"); } @@ -23,13 +25,13 @@ macro_rules! assert_bytes_eq { }; } -fn check_simple_ascii_trie(items: &LiteMap<&[u8], usize>, trie: &ZeroTrieSimpleAscii) +fn check_simple_ascii_trie(items: &LiteMap<&ByteStr, usize>, trie: &ZeroTrieSimpleAscii) where S: AsRef<[u8]> + ?Sized, { // Check that each item is in the trie for (k, v) in items.iter() { - assert_eq!(trie.get(k), Some(*v)); + assert_eq!(trie.get(k.as_bytes()), Some(*v)); } // Check that some items are not in the trie for s in NON_EXISTENT_STRINGS.iter() { @@ -38,20 +40,20 @@ where // Check that the iterator returns items in the same order as the LiteMap assert!(items .iter() - .map(|(s, v)| (String::from_utf8(s.to_vec()).unwrap(), *v)) + .map(|(s, v)| (String::from_utf8(s.as_bytes().to_vec()).unwrap(), *v)) .eq(trie.iter())); // Check that the const builder works let const_trie = ZeroTrieSimpleAscii::try_from_litemap_with_const_builder(items).unwrap(); assert_eq!(trie.as_bytes(), const_trie.as_bytes()); } -fn check_phf_ascii_trie(items: &LiteMap<&[u8], usize>, trie: &ZeroTriePerfectHash) +fn check_phf_ascii_trie(items: &LiteMap<&ByteStr, usize>, trie: &ZeroTriePerfectHash) where S: AsRef<[u8]> + ?Sized, { // Check that each item is in the trie for (k, v) in items.iter() { - assert_eq!(trie.get(k), Some(*v)); + assert_eq!(trie.get(k.as_bytes()), Some(*v)); } // Check that some items are not in the trie for s in NON_EXISTENT_STRINGS.iter() { @@ -59,20 +61,20 @@ where } // Check that the iterator returns the contents of the LiteMap // Note: Since the items might not be in order, we collect them into a new LiteMap - let recovered_items: LiteMap<_, _> = trie.iter().collect(); + let recovered_items: LiteMap, usize> = trie.iter().map(|(k, v)| (ByteStr::from_boxed_bytes(k.into_boxed_slice()), v)).collect(); assert_eq!( - items.to_borrowed_keys_values::<[u8], usize, Vec<_>>(), + items.to_borrowed_keys_values::>(), recovered_items.to_borrowed_keys_values() ); } -fn check_phf_bytes_trie(items: &LiteMap<&[u8], usize>, trie: &ZeroTriePerfectHash) +fn check_phf_bytes_trie(items: &LiteMap<&ByteStr, usize>, trie: &ZeroTriePerfectHash) where S: AsRef<[u8]> + ?Sized, { // Check that each item is in the trie for (k, v) in items.iter() { - assert_eq!(trie.get(k), Some(*v), "{k:?}"); + assert_eq!(trie.get(k.as_bytes()), Some(*v), "{k:?}"); } // Check that some items are not in the trie for s in NON_EXISTENT_STRINGS.iter() { @@ -80,19 +82,19 @@ where } // Check that the iterator returns the contents of the LiteMap // Note: Since the items might not be in order, we collect them into a new LiteMap - let recovered_items: LiteMap<_, _> = trie.iter().collect(); + let recovered_items: LiteMap, usize> = trie.iter().map(|(k, v)| (ByteStr::from_boxed_bytes(k.into_boxed_slice()), v)).collect(); assert_eq!( - items.to_borrowed_keys_values::<[u8], usize, Vec<_>>(), + items.to_borrowed_keys_values::>(), recovered_items.to_borrowed_keys_values() ); } #[test] fn test_basic() { - let lm1a: LiteMap<&[u8], usize> = testdata::basic::DATA_ASCII.iter().copied().collect(); - let lm1b: LiteMap<&[u8], usize> = lm1a.to_borrowed_keys(); - let lm2: LiteMap<&[u8], usize> = testdata::basic::DATA_UNICODE.iter().copied().collect(); - let lm3: LiteMap<&[u8], usize> = testdata::basic::DATA_BINARY.iter().copied().collect(); + let lm1a: LiteMap<&ByteStr, usize> = testdata::basic::DATA_ASCII.iter().map(|(k, v)| (ByteStr::from_bytes(k), *v)).collect(); + let lm1b: LiteMap<&ByteStr, usize> = lm1a.to_borrowed_keys(); + let lm2: LiteMap<&ByteStr, usize> = testdata::basic::DATA_UNICODE.iter().map(|(k, v)| (ByteStr::from_bytes(k), *v)).collect(); + let lm3: LiteMap<&ByteStr, usize> = testdata::basic::DATA_BINARY.iter().map(|(k, v)| (ByteStr::from_bytes(k), *v)).collect(); let expected_bytes = testdata::basic::TRIE_ASCII; let trie = ZeroTrieSimpleAscii::try_from(&lm1a).unwrap(); @@ -116,7 +118,7 @@ fn test_basic() { #[test] fn test_empty() { - let trie = ZeroTrieSimpleAscii::try_from(&LiteMap::<&[u8], usize>::new_vec()).unwrap(); + let trie = ZeroTrieSimpleAscii::try_from(&LiteMap::<&ByteStr, usize>::new_vec()).unwrap(); assert_eq!(trie.byte_len(), 0); assert!(trie.is_empty()); assert_eq!(trie.get(b""), None); @@ -125,8 +127,8 @@ fn test_empty() { #[test] fn test_single_empty_value() { - let litemap: LiteMap<&[u8], usize> = [ - (&b""[..], 10), // + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str(""), 10), // ] .into_iter() .collect(); @@ -136,16 +138,15 @@ fn test_single_empty_value() { let expected_bytes = &[0b10001010]; assert_eq!(trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(1, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_single_byte_string() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"x"[..], 10), // + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("x"), 10), // ] .into_iter() .collect(); @@ -156,16 +157,15 @@ fn test_single_byte_string() { let expected_bytes = &[b'x', 0b10001010]; assert_bytes_eq!(2, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(2, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_single_string() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"xyz"[..], 10), // + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("xyz"), 10), // ] .into_iter() .collect(); @@ -178,15 +178,14 @@ fn test_single_string() { let expected_bytes = &[b'x', b'y', b'z', 0b10001010]; assert_bytes_eq!(4, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(4, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_prefix_strings() { - let litemap: LiteMap<&[u8], usize> = [(&b"x"[..], 0), (b"xy", 1)].into_iter().collect(); + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str("x"), 0), (ByteStr::from_str("xy"), 1)].into_iter().collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); assert_eq!(trie.get(b""), None); assert_eq!(trie.get(b"xyz"), None); @@ -194,15 +193,14 @@ fn test_prefix_strings() { let expected_bytes = &[b'x', 0b10000000, b'y', 0b10000001]; assert_bytes_eq!(4, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(4, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_single_byte_branch() { - let litemap: LiteMap<&[u8], usize> = [(&b"x"[..], 0), (b"y", 1)].into_iter().collect(); + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str("x"), 0), (ByteStr::from_str("y"), 1)].into_iter().collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); assert_eq!(trie.get(b""), None); assert_eq!(trie.get(b"xy"), None); @@ -210,15 +208,14 @@ fn test_single_byte_branch() { let expected_bytes = &[0b11000010, b'x', b'y', 1, 0b10000000, 0b10000001]; assert_bytes_eq!(6, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(6, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_multi_byte_branch() { - let litemap: LiteMap<&[u8], usize> = [(&b"axb"[..], 0), (b"ayc", 1)].into_iter().collect(); + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str("axb"), 0), (ByteStr::from_str("ayc"), 1)].into_iter().collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); assert_eq!(trie.get(b""), None); assert_eq!(trie.get(b"a"), None); @@ -230,15 +227,14 @@ fn test_multi_byte_branch() { ]; assert_bytes_eq!(9, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(9, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_linear_varint_values() { - let litemap: LiteMap<&[u8], usize> = [(&b""[..], 100), (b"x", 500), (b"xyz", 5000)] + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str(""), 100), (ByteStr::from_str("x"), 500), (ByteStr::from_str("xyz"), 5000)] .into_iter() .collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); @@ -249,15 +245,14 @@ fn test_linear_varint_values() { let expected_bytes = &[0x90, 0x54, b'x', 0x93, 0x64, b'y', b'z', 0x90, 0x96, 0x78]; assert_bytes_eq!(10, trie.as_bytes(), expected_bytes); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(10, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_bug() { - let litemap: LiteMap<&[u8], usize> = [(&b"abc"[..], 100), (b"abcd", 500), (b"abcde", 5000)] + let litemap: LiteMap<&ByteStr, usize> = [(ByteStr::from_str("abc"), 100), (ByteStr::from_str("abcd"), 500), (ByteStr::from_str("abcde"), 5000)] .into_iter() .collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); @@ -266,16 +261,15 @@ fn test_bug() { assert_eq!(trie.get(b"abCD"), None); check_simple_ascii_trie(&litemap, &trie); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_varint_branch() { let chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - let litemap: LiteMap<&[u8], usize> = (0..chars.len()) - .map(|i| (chars.get(i..i + 1).unwrap().as_bytes(), i)) + let litemap: LiteMap<&ByteStr, usize> = (0..chars.len()) + .map(|i| (ByteStr::from_str(chars.get(i..i + 1).unwrap()), i)) .collect(); let trie = ZeroTrieSimpleAscii::try_from(&litemap.as_sliced()).unwrap(); assert_eq!(trie.get(b""), None); @@ -287,29 +281,28 @@ fn test_varint_branch() { 0b11100000, // branch varint lead 0x14, // branch varint trail // search array: - b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', - b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', - b'U', b'V', b'W', b'X', b'Y', b'Z', - b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', - b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', - b'u', b'v', b'w', b'x', b'y', b'z', + b'A', b'a', b'B', b'b', b'C', b'c', b'D', b'd', b'E', b'e', + b'F', b'f', b'G', b'g', b'H', b'h', b'I', b'i', b'J', b'j', + b'K', b'k', b'L', b'l', b'M', b'm', b'N', b'n', b'O', b'o', + b'P', b'p', b'Q', b'q', b'R', b'r', b'S', b's', b'T', b't', + b'U', b'u', b'V', b'v', b'W', b'w', b'X', b'x', b'Y', b'y', + b'Z', b'z', // offset array: - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, - 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, - 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, - 86, - // single-byte values: - 0x80, (0x80 | 1), (0x80 | 2), (0x80 | 3), (0x80 | 4), - (0x80 | 5), (0x80 | 6), (0x80 | 7), (0x80 | 8), (0x80 | 9), - (0x80 | 10), (0x80 | 11), (0x80 | 12), (0x80 | 13), (0x80 | 14), - (0x80 | 15), - // multi-byte values: - 0x90, 0, 0x90, 1, 0x90, 2, 0x90, 3, 0x90, 4, 0x90, 5, - 0x90, 6, 0x90, 7, 0x90, 8, 0x90, 9, 0x90, 10, 0x90, 11, - 0x90, 12, 0x90, 13, 0x90, 14, 0x90, 15, 0x90, 16, 0x90, 17, - 0x90, 18, 0x90, 19, 0x90, 20, 0x90, 21, 0x90, 22, 0x90, 23, - 0x90, 24, 0x90, 25, 0x90, 26, 0x90, 27, 0x90, 28, 0x90, 29, - 0x90, 30, 0x90, 31, 0x90, 32, 0x90, 33, 0x90, 34, 0x90, 35, + 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24, + 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46, + 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, + 78, 80, 82, 84, 86, + // values (mix of single-byte and multi-byte): + (0x80 | 0), 0x90, 10, (0x80 | 1), 0x90, 11, (0x80 | 2), 0x90, 12, + (0x80 | 3), 0x90, 13, (0x80 | 4), 0x90, 14, (0x80 | 5), 0x90, 15, + (0x80 | 6), 0x90, 16, (0x80 | 7), 0x90, 17, (0x80 | 8), 0x90, 18, + (0x80 | 9), 0x90, 19, (0x80 | 10), 0x90, 20, (0x80 | 11), 0x90, 21, + (0x80 | 12), 0x90, 22, (0x80 | 13), 0x90, 23, (0x80 | 14), 0x90, 24, + (0x80 | 15), 0x90, 25, + 0x90, 0, 0x90, 26, 0x90, 1, 0x90, 27, 0x90, 2, 0x90, 28, + 0x90, 3, 0x90, 29, 0x90, 4, 0x90, 30, 0x90, 5, 0x90, 31, + 0x90, 6, 0x90, 32, 0x90, 7, 0x90, 33, 0x90, 8, 0x90, 34, + 0x90, 9, 0x90, 35, ]; assert_bytes_eq!(193, trie.as_bytes(), expected_bytes); @@ -347,25 +340,24 @@ fn test_varint_branch() { 0x80 | 13, 0x80 | 14, 0x90, 16, 0x90, 10, 0x90, 11, 0x90, 12, 0x90, 29, 0x90, 13, 0x90, 15, 0x90, 14, ]; - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(246, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); } #[test] fn test_below_wide() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"abcdefghijklmnopqrstuvwxyz"[..], 1), - (b"bcdefghijklmnopqrstuvwxyza", 2), - (b"cdefghijklmnopqrstuvwxyzab", 3), - (b"defghijklmnopqrstuvwxyzabc", 4), - (b"efghijklmnopqrstuvwxyzabcd", 5), - (b"fghijklmnopqrstuvwxyzabcde", 6), - (b"ghijklmnopqrstuvwxyzabcdef", 7), - (b"hijklmnopqrstuvwxyzabcdefg", 8), - (b"ijklmnopqrstuvwxyzabcdefgh", 9), - (b"jklmnopqrstuvwxyzabcd", 10), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("abcdefghijklmnopqrstuvwxyz"), 1), + (ByteStr::from_str("bcdefghijklmnopqrstuvwxyza"), 2), + (ByteStr::from_str("cdefghijklmnopqrstuvwxyzab"), 3), + (ByteStr::from_str("defghijklmnopqrstuvwxyzabc"), 4), + (ByteStr::from_str("efghijklmnopqrstuvwxyzabcd"), 5), + (ByteStr::from_str("fghijklmnopqrstuvwxyzabcde"), 6), + (ByteStr::from_str("ghijklmnopqrstuvwxyzabcdef"), 7), + (ByteStr::from_str("hijklmnopqrstuvwxyzabcdefg"), 8), + (ByteStr::from_str("ijklmnopqrstuvwxyzabcdefgh"), 9), + (ByteStr::from_str("jklmnopqrstuvwxyzabcd"), 10), ] .into_iter() .collect(); @@ -417,17 +409,17 @@ fn test_below_wide() { #[test] fn test_at_wide() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"abcdefghijklmnopqrstuvwxyz"[..], 1), - (b"bcdefghijklmnopqrstuvwxyza", 2), - (b"cdefghijklmnopqrstuvwxyzab", 3), - (b"defghijklmnopqrstuvwxyzabc", 4), - (b"efghijklmnopqrstuvwxyzabcd", 5), - (b"fghijklmnopqrstuvwxyzabcde", 6), - (b"ghijklmnopqrstuvwxyzabcdef", 7), - (b"hijklmnopqrstuvwxyzabcdefg", 8), - (b"ijklmnopqrstuvwxyzabcdefgh", 9), - (b"jklmnopqrstuvwxyzabcde", 10), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("abcdefghijklmnopqrstuvwxyz"), 1), + (ByteStr::from_str("bcdefghijklmnopqrstuvwxyza"), 2), + (ByteStr::from_str("cdefghijklmnopqrstuvwxyzab"), 3), + (ByteStr::from_str("defghijklmnopqrstuvwxyzabc"), 4), + (ByteStr::from_str("efghijklmnopqrstuvwxyzabcd"), 5), + (ByteStr::from_str("fghijklmnopqrstuvwxyzabcde"), 6), + (ByteStr::from_str("ghijklmnopqrstuvwxyzabcdef"), 7), + (ByteStr::from_str("hijklmnopqrstuvwxyzabcdefg"), 8), + (ByteStr::from_str("ijklmnopqrstuvwxyzabcdefgh"), 9), + (ByteStr::from_str("jklmnopqrstuvwxyzabcde"), 10), ] .into_iter() .collect(); @@ -481,17 +473,17 @@ fn test_at_wide() { #[test] fn test_at_wide_plus() { - let litemap: LiteMap<&[u8], usize> = [ - (&b"abcdefghijklmnopqrstuvwxyz"[..], 1), - (b"bcdefghijklmnopqrstuvwxyza", 2), - (b"cdefghijklmnopqrstuvwxyzab", 3), - (b"defghijklmnopqrstuvwxyzabc", 4), - (b"efghijklmnopqrstuvwxyzabcd", 5), - (b"fghijklmnopqrstuvwxyzabcde", 6), - (b"ghijklmnopqrstuvwxyzabcdef", 7), - (b"hijklmnopqrstuvwxyzabcdefg", 8), - (b"ijklmnopqrstuvwxyzabcdefgh", 9), - (b"jklmnopqrstuvwxyzabcdef", 10), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str("abcdefghijklmnopqrstuvwxyz"), 1), + (ByteStr::from_str("bcdefghijklmnopqrstuvwxyza"), 2), + (ByteStr::from_str("cdefghijklmnopqrstuvwxyzab"), 3), + (ByteStr::from_str("defghijklmnopqrstuvwxyzabc"), 4), + (ByteStr::from_str("efghijklmnopqrstuvwxyzabcd"), 5), + (ByteStr::from_str("fghijklmnopqrstuvwxyzabcde"), 6), + (ByteStr::from_str("ghijklmnopqrstuvwxyzabcdef"), 7), + (ByteStr::from_str("hijklmnopqrstuvwxyzabcdefg"), 8), + (ByteStr::from_str("ijklmnopqrstuvwxyzabcdefgh"), 9), + (ByteStr::from_str("jklmnopqrstuvwxyzabcdef"), 10), ] .into_iter() .collect(); @@ -545,16 +537,16 @@ fn test_at_wide_plus() { #[test] fn test_everything() { - let litemap: LiteMap<&[u8], usize> = [ - (&b""[..], 0), - (b"axb", 100), - (b"ayc", 2), - (b"azd", 3), - (b"bxe", 4), - (b"bxefg", 500), - (b"bxefh", 6), - (b"bxei", 7), - (b"bxeikl", 8), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str(""), 0), + (ByteStr::from_str("axb"), 100), + (ByteStr::from_str("ayc"), 2), + (ByteStr::from_str("azd"), 3), + (ByteStr::from_str("bxe"), 4), + (ByteStr::from_str("bxefg"), 500), + (ByteStr::from_str("bxefh"), 6), + (ByteStr::from_str("bxei"), 7), + (ByteStr::from_str("bxeikl"), 8), ] .into_iter() .collect(); @@ -643,24 +635,23 @@ fn test_everything() { b'l', // 0b10001000, // value 8 ]; - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(36, trie_phf.as_bytes(), expected_bytes); check_phf_ascii_trie(&litemap, &trie_phf); - let zhm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 73); - let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 63); - let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 146); - let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 136); } @@ -675,19 +666,19 @@ macro_rules! utf8_byte { #[test] fn test_non_ascii() { - let litemap: LiteMap<&[u8], usize> = [ - ("".as_bytes(), 0), - ("axb".as_bytes(), 100), - ("ayc".as_bytes(), 2), - ("azd".as_bytes(), 3), - ("bxe".as_bytes(), 4), - ("bxefg".as_bytes(), 500), - ("bxefh".as_bytes(), 6), - ("bxei".as_bytes(), 7), - ("bxeikl".as_bytes(), 8), - ("bxeiklmΚαλημέρααα".as_bytes(), 9), - ("bxeiklmαnλo".as_bytes(), 10), - ("bxeiklmη".as_bytes(), 11), + let litemap: LiteMap<&ByteStr, usize> = [ + (ByteStr::from_str(""), 0), + (ByteStr::from_str("axb"), 100), + (ByteStr::from_str("ayc"), 2), + (ByteStr::from_str("azd"), 3), + (ByteStr::from_str("bxe"), 4), + (ByteStr::from_str("bxefg"), 500), + (ByteStr::from_str("bxefh"), 6), + (ByteStr::from_str("bxei"), 7), + (ByteStr::from_str("bxeikl"), 8), + (ByteStr::from_str("bxeiklmΚαλημέρααα"), 9), + (ByteStr::from_str("bxeiklmαnλo"), 10), + (ByteStr::from_str("bxeiklmη"), 11), ] .into_iter() .collect(); @@ -734,11 +725,20 @@ fn test_non_ascii() { 0b10100001, // span of length 1 utf8_byte!('Κ', 0), // NOTE: all three letters have the same lead byte 0b11000011, // branch of 3 - utf8_byte!('Κ', 1), utf8_byte!('α', 1), utf8_byte!('η', 1), - 21, - 27, + utf8_byte!('Κ', 1), + 6, + 7, + // 21, + // 27, + b'n', + 0b10100010, // span of length 2 + utf8_byte!('λ', 0), + utf8_byte!('λ', 1), + b'o', + 0b10001010, // value 10 + 0b10001011, // value 11 0b10110000, // span of length 18 (lead) 0b00000010, // span of length 18 (trail) utf8_byte!('α', 0), @@ -760,13 +760,6 @@ fn test_non_ascii() { utf8_byte!('α', 0), utf8_byte!('α', 1), 0b10001001, // value 9 - b'n', - 0b10100010, // span of length 2 - utf8_byte!('λ', 0), - utf8_byte!('λ', 1), - b'o', - 0b10001010, // value 10 - 0b10001011, // value 11 ]; let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_bytes_eq!(73, trie_phf.as_bytes(), expected_bytes); @@ -776,15 +769,15 @@ fn test_non_ascii() { #[test] fn test_max_branch() { // Evaluate a branch with all 256 possible children - let mut litemap: LiteMap<&[u8], usize> = LiteMap::new_vec(); + let mut litemap: LiteMap<&ByteStr, usize> = LiteMap::new_vec(); let all_bytes: Vec = (u8::MIN..=u8::MAX).collect(); assert_eq!(all_bytes.len(), 256); let all_bytes_prefixed: Vec<[u8; 2]> = (u8::MIN..=u8::MAX).map(|x| [b'\0', x]).collect(); for b in all_bytes.iter() { - litemap.insert(core::slice::from_ref(b), *b as usize); + litemap.insert(ByteStr::from_bytes(core::slice::from_ref(b)), *b as usize); } for s in all_bytes_prefixed.iter() { - litemap.insert(s, s[1] as usize); + litemap.insert(ByteStr::from_bytes(s), s[1] as usize); } let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_eq!(trie_phf.byte_len(), 3042); @@ -799,24 +792,23 @@ fn test_short_subtags_10pct() { assert_eq!(trie.byte_len(), 1050); check_simple_ascii_trie(&litemap, &trie); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_eq!(trie_phf.byte_len(), 1100); check_phf_ascii_trie(&litemap, &trie_phf); - let zhm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 1329); - let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 1328); - let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 2835); - let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 2834); } @@ -829,24 +821,23 @@ fn test_short_subtags() { assert_eq!(trie.byte_len(), 8793); check_simple_ascii_trie(&litemap, &trie); - let litemap_bytes = litemap.to_borrowed_keys::<[u8], Vec<_>>(); - let trie_phf = ZeroTriePerfectHash::try_from(&litemap_bytes).unwrap(); + let trie_phf = ZeroTriePerfectHash::try_from(&litemap).unwrap(); assert_eq!(trie_phf.byte_len(), 9400); check_phf_ascii_trie(&litemap, &trie_phf); - let zm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zm: zerovec::ZeroMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zm).unwrap(); assert_eq!(zhm_buf.len(), 15180); - let zm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zm: zerovec::ZeroMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zm).unwrap(); assert_eq!(zhm_buf.len(), 13302); - let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (*a, b)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], usize> = litemap.iter().map(|(a, b)| (a.as_bytes(), b)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 30198); - let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (*a, *b as u8)).collect(); + let zhm: zerovec::ZeroHashMap<[u8], u8> = litemap.iter().map(|(a, b)| (a.as_bytes(), *b as u8)).collect(); let zhm_buf = postcard::to_allocvec(&zhm).unwrap(); assert_eq!(zhm_buf.len(), 28320); } diff --git a/utils/zerotrie/tests/data/data.rs b/utils/zerotrie/tests/data/data.rs index 6dd483b7bad..0d7a63640d5 100644 --- a/utils/zerotrie/tests/data/data.rs +++ b/utils/zerotrie/tests/data/data.rs @@ -19,13 +19,12 @@ const fn single_byte_branch_equal(x: u8) -> u8 { use single_byte_branch_equal as single_byte_short_match; #[allow(dead_code)] -pub fn strings_to_litemap<'a>(strings: &[&'a str]) -> LiteMap<&'a [u8], usize> { +pub fn strings_to_litemap<'a>(strings: &[&'a str]) -> LiteMap<&'a ByteStr, usize> { strings .iter() .copied() - .map(|x| x.as_bytes()) .enumerate() - .map(|(i, s)| (s, i)) + .map(|(i, s)| (ByteStr::from_str(s), i)) .collect() } @@ -478,8 +477,8 @@ pub mod short_subtags { "fai", "fan", "ff", - "ff-Adlm", "ffi", + "ff-Adlm", "ffm", "fi", "fia", @@ -582,10 +581,10 @@ pub mod short_subtags { "gwt", "gyi", "ha", - "ha-CM", - "ha-SD", "hag", "hak", + "ha-CM", + "ha-SD", "ham", "haw", "haz", @@ -594,12 +593,12 @@ pub mod short_subtags { "he", "hhy", "hi", - "hi-Latn", "hia", "hif", "hig", "hih", "hil", + "hi-Latn", "hla", "hlu", "hmd", @@ -728,13 +727,13 @@ pub mod short_subtags { "kjs", "kjy", "kk", + "kkc", + "kkj", "kk-AF", "kk-Arab", "kk-CN", "kk-IR", "kk-MN", - "kkc", - "kkj", "kl", "kln", "klq", @@ -784,13 +783,13 @@ pub mod short_subtags { "kto", "ktr", "ku", - "ku-Arab", - "ku-LB", - "ku-Yezi", "kub", "kud", "kue", "kuj", + "ku-Arab", + "ku-LB", + "ku-Yezi", "kum", "kun", "kup", @@ -813,11 +812,11 @@ pub mod short_subtags { "kxw", "kxz", "ky", + "kye", "ky-Arab", "ky-CN", "ky-Latn", "ky-TR", - "kye", "kyx", "kzh", "kzj", @@ -953,11 +952,11 @@ pub mod short_subtags { "mmu", "mmx", "mn", - "mn-CN", - "mn-Mong", "mna", "mnf", "mni", + "mn-CN", + "mn-Mong", "mnw", "mo", "moa", @@ -1106,11 +1105,11 @@ pub mod short_subtags { "oui", "ozm", "pa", - "pa-Arab", - "pa-PK", "pag", "pal", "pal-Phlp", + "pa-Arab", + "pa-PK", "pam", "pap", "pau", @@ -1203,12 +1202,12 @@ pub mod short_subtags { "scn", "sco", "sd", + "sdc", + "sdh", "sd-Deva", "sd-IN", "sd-Khoj", "sd-Sind", - "sdc", - "sdh", "se", "sef", "seh", @@ -1262,11 +1261,11 @@ pub mod short_subtags { "sps", "sq", "sr", + "srb", "sr-ME", "sr-RO", "sr-RU", "sr-TR", - "srb", "srn", "srr", "srx", @@ -1320,9 +1319,9 @@ pub mod short_subtags { "tet", "tfi", "tg", + "tgc", "tg-Arab", "tg-PK", - "tgc", "tgo", "tgu", "th", @@ -1392,44 +1391,14 @@ pub mod short_subtags { "udi", "udm", "ug", + "uga", "ug-Cyrl", "ug-KZ", "ug-MN", - "uga", "uk", "uli", "umb", "und", - "und-002", - "und-003", - "und-005", - "und-009", - "und-011", - "und-013", - "und-014", - "und-015", - "und-017", - "und-018", - "und-019", - "und-021", - "und-029", - "und-030", - "und-034", - "und-035", - "und-039", - "und-053", - "und-054", - "und-057", - "und-061", - "und-142", - "und-143", - "und-145", - "und-150", - "und-151", - "und-154", - "und-155", - "und-202", - "und-419", "und-AD", "und-Adlm", "und-AE", @@ -1731,6 +1700,27 @@ pub mod short_subtags { "und-Osge", "und-Osma", "und-Ougr", + "und-002", + "und-003", + "und-005", + "und-009", + "und-011", + "und-013", + "und-014", + "und-015", + "und-017", + "und-018", + "und-019", + "und-021", + "und-029", + "und-030", + "und-034", + "und-035", + "und-039", + "und-053", + "und-054", + "und-057", + "und-061", "und-PA", "und-Palm", "und-Pauc", @@ -1753,8 +1743,16 @@ pub mod short_subtags { "und-PT", "und-PW", "und-PY", + "und-142", + "und-143", + "und-145", + "und-150", + "und-151", + "und-154", + "und-155", "und-QA", "und-QO", + "und-202", "und-RE", "und-Rjng", "und-RO", @@ -1793,6 +1791,7 @@ pub mod short_subtags { "und-SY", "und-Sylo", "und-Syrc", + "und-419", "und-Tagb", "und-Takr", "und-Tale", diff --git a/utils/zerotrie/tests/ignorecase_test.rs b/utils/zerotrie/tests/ignorecase_test.rs index fb73ef7cc33..ff9c38caa0c 100644 --- a/utils/zerotrie/tests/ignorecase_test.rs +++ b/utils/zerotrie/tests/ignorecase_test.rs @@ -3,8 +3,10 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use zerotrie::ZeroAsciiIgnoreCaseTrie; +use zerotrie::ByteStr; mod testdata { + use zerotrie::ByteStr; include!("data/data.rs"); } @@ -18,18 +20,20 @@ fn test_ignore_case_coverage() { ZeroAsciiIgnoreCaseTrie::try_from(&litemap).unwrap(); let trie = litemap .iter() - .map(|(k, v)| (*k, *v)) + .map(|(k, v)| (k.as_bytes(), *v)) .collect::>>(); // Test lookup for (k, v) in litemap.iter() { - assert_eq!(trie.get(k), Some(*v), "normal: {k:?}"); + assert_eq!(trie.get(k.as_bytes()), Some(*v), "normal: {k:?}"); let k_upper = k + .as_bytes() .iter() .map(|c| c.to_ascii_uppercase()) .collect::>(); assert_eq!(trie.get(k_upper), Some(*v), "upper: {k:?}"); let k_lower = k + .as_bytes() .iter() .map(|c| c.to_ascii_lowercase()) .collect::>(); @@ -40,7 +44,7 @@ fn test_ignore_case_coverage() { let problematic_strs = &["A", "ab", "abc", "aBcd", "aBcgHi"]; for problematic_str in problematic_strs { let mut litemap = litemap.clone(); - litemap.insert(problematic_str.as_bytes(), 100); + litemap.insert(ByteStr::from_str(problematic_str), 100); ZeroAsciiIgnoreCaseTrie::try_from(&litemap).expect_err(problematic_str); } } diff --git a/utils/zerotrie/tests/locale_aux_test.rs b/utils/zerotrie/tests/locale_aux_test.rs index eea27af7842..177d39b19c3 100644 --- a/utils/zerotrie/tests/locale_aux_test.rs +++ b/utils/zerotrie/tests/locale_aux_test.rs @@ -10,8 +10,10 @@ use writeable::Writeable; use zerotrie::ZeroTriePerfectHash; use zerotrie::ZeroTrieSimpleAscii; use zerovec::VarZeroVec; +use zerotrie::ByteStr; mod testdata { + use zerotrie::ByteStr; include!("data/data.rs"); } @@ -91,7 +93,7 @@ fn test_aux_split() { let mut total_simpleascii_len = 0; let mut total_perfecthash_len = 0; let mut total_vzv_len = 0; - let mut unique_locales = BTreeSet::new(); + let mut unique_locales = BTreeSet::>::new(); for private in aux_keys.iter() { let current_locales: Vec = locales .iter() @@ -102,10 +104,10 @@ fn test_aux_split() { l }) .collect(); - let litemap: LiteMap, usize> = current_locales + let litemap: LiteMap, usize> = current_locales .iter() .map(|l| { - (l.write_to_string().into_owned().into_bytes(), { + (ByteStr::from_boxed_bytes(l.write_to_string().into_owned().into_bytes().into_boxed_slice()), { cumulative_index += 1; cumulative_index - 1 }) @@ -118,8 +120,8 @@ fn test_aux_split() { let trie = ZeroTriePerfectHash::try_from(&litemap).unwrap(); total_perfecthash_len += trie.byte_len(); - for k in litemap.iter_keys() { - unique_locales.insert(k.clone()); + for (k, _) in litemap.into_iter() { + unique_locales.insert(k); } let strs: Vec = current_locales