Revise strings interning.

This commit is contained in:
Stephen Chung
2022-08-12 16:34:57 +08:00
parent 5ba9b3bd1c
commit cba394d73c
15 changed files with 272 additions and 123 deletions

View File

@@ -1,5 +1,5 @@
use crate::func::hashing::get_hasher;
use crate::{Identifier, ImmutableString};
use crate::ImmutableString;
#[cfg(feature = "no_std")]
use std::prelude::v1::*;
@@ -10,12 +10,20 @@ use std::{
ops::AddAssign,
};
/// Maximum number of strings interned.
pub const MAX_INTERNED_STRINGS: usize = 256;
/// Maximum length of strings interned.
pub const MAX_STRING_LEN: usize = 24;
/// _(internals)_ A factory of identifiers from text strings.
/// Exported under the `internals` feature only.
///
/// Normal identifiers, property getters and setters are interned separately.
#[derive(Debug, Clone, Default, Hash)]
#[derive(Debug, Clone, Hash)]
pub struct StringsInterner<'a> {
/// Maximum capacity.
max: usize,
/// Normal strings.
strings: BTreeMap<u64, ImmutableString>,
/// Property getters.
@@ -28,12 +36,27 @@ pub struct StringsInterner<'a> {
dummy: PhantomData<&'a ()>,
}
impl Default for StringsInterner<'_> {
#[inline(always)]
fn default() -> Self {
Self::new()
}
}
impl StringsInterner<'_> {
/// Create a new [`StringsInterner`].
#[inline]
#[inline(always)]
#[must_use]
pub fn new() -> Self {
Self::new_with_capacity(MAX_INTERNED_STRINGS)
}
/// Create a new [`StringsInterner`] with a maximum capacity.
#[inline]
#[must_use]
pub fn new_with_capacity(capacity: usize) -> Self {
Self {
max: capacity,
strings: BTreeMap::new(),
#[cfg(not(feature = "no_object"))]
getters: BTreeMap::new(),
@@ -46,7 +69,7 @@ impl StringsInterner<'_> {
/// Get an identifier from a text string and prefix, adding it to the interner if necessary.
#[inline(always)]
#[must_use]
pub fn get(&mut self, text: impl AsRef<str>) -> ImmutableString {
pub fn get(&mut self, text: impl AsRef<str> + Into<ImmutableString>) -> ImmutableString {
self.get_with_prefix("", text)
}
@@ -65,40 +88,81 @@ impl StringsInterner<'_> {
/// Panics if the prefix is not recognized.
#[inline]
#[must_use]
pub fn get_with_prefix(
pub fn get_with_prefix<T: AsRef<str> + Into<ImmutableString>>(
&mut self,
prefix: impl AsRef<str>,
text: impl AsRef<str>,
text: T,
) -> ImmutableString {
let prefix = prefix.as_ref();
let text = text.as_ref();
let key = text.as_ref();
let (dict, mapper): (_, fn(&str) -> Identifier) = match prefix {
// Do not intern numbers
if prefix == "" && key.bytes().all(|c| c == b'.' || (c >= b'0' && c <= b'9')) {
return text.into();
}
let (dict, mapper): (_, fn(T) -> ImmutableString) = match prefix {
"" => (&mut self.strings, |s| s.into()),
#[cfg(not(feature = "no_object"))]
crate::engine::FN_GET => (&mut self.getters, crate::engine::make_getter),
crate::engine::FN_GET => (&mut self.getters, |s| {
crate::engine::make_getter(s.as_ref()).into()
}),
#[cfg(not(feature = "no_object"))]
crate::engine::FN_SET => (&mut self.setters, crate::engine::make_setter),
crate::engine::FN_SET => (&mut self.setters, |s| {
crate::engine::make_setter(s.as_ref()).into()
}),
_ => unreachable!("unsupported prefix {}", prefix),
};
if key.len() > MAX_STRING_LEN {
return mapper(text);
}
let hasher = &mut get_hasher();
text.hash(hasher);
key.hash(hasher);
let key = hasher.finish();
if !dict.is_empty() && dict.contains_key(&key) {
dict.get(&key).unwrap().clone()
} else {
let value: ImmutableString = mapper(text).into();
dict.insert(key, value.clone());
value
return dict.get(&key).unwrap().clone();
}
let value = mapper(text);
if value.strong_count() > 1 {
return value;
}
dict.insert(key, value.clone());
println!("Interning '{value}'");
// If the interner is over capacity, remove the longest entry
if self.strings.len() > self.max {
// Leave some buffer to grow when shrinking the cache.
// We leave at least two entries, one for the empty string, and one for the string
// that has just been inserted.
let max = if self.max < 5 { 2 } else { self.max - 3 };
while self.strings.len() > max {
let (_, n) = self.strings.iter().fold((0, 0), |(x, n), (&k, v)| {
if k != key && v.len() > x {
(v.len(), k)
} else {
(x, n)
}
});
self.strings.remove(&n);
}
}
value
}
/// Number of strings interned.
#[inline(always)]
#[inline]
#[must_use]
pub fn len(&self) -> usize {
#[cfg(not(feature = "no_object"))]
@@ -109,7 +173,7 @@ impl StringsInterner<'_> {
}
/// Number of strings interned.
#[inline(always)]
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
#[cfg(not(feature = "no_object"))]
@@ -118,16 +182,30 @@ impl StringsInterner<'_> {
#[cfg(feature = "no_object")]
return self.strings.is_empty();
}
/// Clear all interned strings.
#[inline]
pub fn clear(&mut self) {
self.strings.clear();
#[cfg(not(feature = "no_object"))]
{
self.getters.clear();
self.setters.clear();
}
}
}
impl AddAssign<Self> for StringsInterner<'_> {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
self.strings.extend(rhs.strings.into_iter());
#[cfg(not(feature = "no_object"))]
self.getters.extend(rhs.getters.into_iter());
#[cfg(not(feature = "no_object"))]
self.setters.extend(rhs.setters.into_iter());
{
self.getters.extend(rhs.getters.into_iter());
self.setters.extend(rhs.setters.into_iter());
}
}
}
@@ -135,12 +213,14 @@ impl AddAssign<&Self> for StringsInterner<'_> {
#[inline(always)]
fn add_assign(&mut self, rhs: &Self) {
self.strings
.extend(rhs.strings.iter().map(|(k, v)| (k.clone(), v.clone())));
.extend(rhs.strings.iter().map(|(&k, v)| (k, v.clone())));
#[cfg(not(feature = "no_object"))]
self.getters
.extend(rhs.getters.iter().map(|(k, v)| (k.clone(), v.clone())));
#[cfg(not(feature = "no_object"))]
self.setters
.extend(rhs.setters.iter().map(|(k, v)| (k.clone(), v.clone())));
{
self.getters
.extend(rhs.getters.iter().map(|(&k, v)| (k, v.clone())));
self.setters
.extend(rhs.setters.iter().map(|(&k, v)| (k, v.clone())));
}
}
}