Revise strings interning.

This commit is contained in:
Stephen Chung
2022-08-12 16:34:57 +08:00
parent 5ba9b3bd1c
commit cba394d73c
15 changed files with 272 additions and 123 deletions

View File

@@ -82,7 +82,7 @@ impl Borrow<SmartString> for ImmutableString {
impl Borrow<str> for ImmutableString {
#[inline(always)]
fn borrow(&self) -> &str {
self.0.as_str()
self.as_str()
}
}
@@ -187,14 +187,14 @@ impl FromIterator<SmartString> for ImmutableString {
impl fmt::Display for ImmutableString {
#[inline(always)]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(self.0.as_str(), f)
fmt::Display::fmt(self.as_str(), f)
}
}
impl fmt::Debug for ImmutableString {
#[inline(always)]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(self.0.as_str(), f)
fmt::Debug::fmt(self.as_str(), f)
}
}
@@ -208,7 +208,7 @@ impl Add for ImmutableString {
} else if self.is_empty() {
rhs
} else {
self.make_mut().push_str(rhs.0.as_str());
self.make_mut().push_str(rhs.as_str());
self
}
}
@@ -225,7 +225,40 @@ impl Add for &ImmutableString {
rhs.clone()
} else {
let mut s = self.clone();
s.make_mut().push_str(rhs.0.as_str());
s.make_mut().push_str(rhs.as_str());
s
}
}
}
impl Add<&Self> for ImmutableString {
type Output = Self;
#[inline]
fn add(mut self, rhs: &Self) -> Self::Output {
if rhs.is_empty() {
self
} else if self.is_empty() {
rhs.clone()
} else {
self.make_mut().push_str(rhs.as_str());
self
}
}
}
impl Add<ImmutableString> for &ImmutableString {
type Output = ImmutableString;
#[inline]
fn add(self, rhs: ImmutableString) -> Self::Output {
if rhs.is_empty() {
self.clone()
} else if self.is_empty() {
rhs
} else {
let mut s = self.clone();
s.make_mut().push_str(rhs.as_str());
s
}
}
@@ -238,7 +271,7 @@ impl AddAssign<&ImmutableString> for ImmutableString {
if self.is_empty() {
self.0 = rhs.0.clone();
} else {
self.make_mut().push_str(rhs.0.as_str());
self.make_mut().push_str(rhs.as_str());
}
}
}
@@ -251,7 +284,7 @@ impl AddAssign<ImmutableString> for ImmutableString {
if self.is_empty() {
self.0 = rhs.0;
} else {
self.make_mut().push_str(rhs.0.as_str());
self.make_mut().push_str(rhs.as_str());
}
}
}
@@ -580,6 +613,10 @@ impl ImmutableString {
pub fn new() -> Self {
Self(SmartString::new_const().into())
}
/// Strong count of references to the underlying string.
pub(crate) fn strong_count(&self) -> usize {
Shared::strong_count(&self.0)
}
/// Consume the [`ImmutableString`] and convert it into a [`String`].
///
/// If there are other references to the same string, a cloned copy is returned.

View File

@@ -1,5 +1,5 @@
use crate::func::hashing::get_hasher;
use crate::{Identifier, ImmutableString};
use crate::ImmutableString;
#[cfg(feature = "no_std")]
use std::prelude::v1::*;
@@ -10,12 +10,20 @@ use std::{
ops::AddAssign,
};
/// Maximum number of strings interned.
pub const MAX_INTERNED_STRINGS: usize = 256;
/// Maximum length of strings interned.
pub const MAX_STRING_LEN: usize = 24;
/// _(internals)_ A factory of identifiers from text strings.
/// Exported under the `internals` feature only.
///
/// Normal identifiers, property getters and setters are interned separately.
#[derive(Debug, Clone, Default, Hash)]
#[derive(Debug, Clone, Hash)]
pub struct StringsInterner<'a> {
/// Maximum capacity.
max: usize,
/// Normal strings.
strings: BTreeMap<u64, ImmutableString>,
/// Property getters.
@@ -28,12 +36,27 @@ pub struct StringsInterner<'a> {
dummy: PhantomData<&'a ()>,
}
impl Default for StringsInterner<'_> {
#[inline(always)]
fn default() -> Self {
Self::new()
}
}
impl StringsInterner<'_> {
/// Create a new [`StringsInterner`].
#[inline]
#[inline(always)]
#[must_use]
pub fn new() -> Self {
Self::new_with_capacity(MAX_INTERNED_STRINGS)
}
/// Create a new [`StringsInterner`] with a maximum capacity.
#[inline]
#[must_use]
pub fn new_with_capacity(capacity: usize) -> Self {
Self {
max: capacity,
strings: BTreeMap::new(),
#[cfg(not(feature = "no_object"))]
getters: BTreeMap::new(),
@@ -46,7 +69,7 @@ impl StringsInterner<'_> {
/// Get an identifier from a text string and prefix, adding it to the interner if necessary.
#[inline(always)]
#[must_use]
pub fn get(&mut self, text: impl AsRef<str>) -> ImmutableString {
pub fn get(&mut self, text: impl AsRef<str> + Into<ImmutableString>) -> ImmutableString {
self.get_with_prefix("", text)
}
@@ -65,40 +88,81 @@ impl StringsInterner<'_> {
/// Panics if the prefix is not recognized.
#[inline]
#[must_use]
pub fn get_with_prefix(
pub fn get_with_prefix<T: AsRef<str> + Into<ImmutableString>>(
&mut self,
prefix: impl AsRef<str>,
text: impl AsRef<str>,
text: T,
) -> ImmutableString {
let prefix = prefix.as_ref();
let text = text.as_ref();
let key = text.as_ref();
let (dict, mapper): (_, fn(&str) -> Identifier) = match prefix {
// Do not intern numbers
if prefix == "" && key.bytes().all(|c| c == b'.' || (c >= b'0' && c <= b'9')) {
return text.into();
}
let (dict, mapper): (_, fn(T) -> ImmutableString) = match prefix {
"" => (&mut self.strings, |s| s.into()),
#[cfg(not(feature = "no_object"))]
crate::engine::FN_GET => (&mut self.getters, crate::engine::make_getter),
crate::engine::FN_GET => (&mut self.getters, |s| {
crate::engine::make_getter(s.as_ref()).into()
}),
#[cfg(not(feature = "no_object"))]
crate::engine::FN_SET => (&mut self.setters, crate::engine::make_setter),
crate::engine::FN_SET => (&mut self.setters, |s| {
crate::engine::make_setter(s.as_ref()).into()
}),
_ => unreachable!("unsupported prefix {}", prefix),
};
if key.len() > MAX_STRING_LEN {
return mapper(text);
}
let hasher = &mut get_hasher();
text.hash(hasher);
key.hash(hasher);
let key = hasher.finish();
if !dict.is_empty() && dict.contains_key(&key) {
dict.get(&key).unwrap().clone()
} else {
let value: ImmutableString = mapper(text).into();
dict.insert(key, value.clone());
value
return dict.get(&key).unwrap().clone();
}
let value = mapper(text);
if value.strong_count() > 1 {
return value;
}
dict.insert(key, value.clone());
println!("Interning '{value}'");
// If the interner is over capacity, remove the longest entry
if self.strings.len() > self.max {
// Leave some buffer to grow when shrinking the cache.
// We leave at least two entries, one for the empty string, and one for the string
// that has just been inserted.
let max = if self.max < 5 { 2 } else { self.max - 3 };
while self.strings.len() > max {
let (_, n) = self.strings.iter().fold((0, 0), |(x, n), (&k, v)| {
if k != key && v.len() > x {
(v.len(), k)
} else {
(x, n)
}
});
self.strings.remove(&n);
}
}
value
}
/// Number of strings interned.
#[inline(always)]
#[inline]
#[must_use]
pub fn len(&self) -> usize {
#[cfg(not(feature = "no_object"))]
@@ -109,7 +173,7 @@ impl StringsInterner<'_> {
}
/// Number of strings interned.
#[inline(always)]
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
#[cfg(not(feature = "no_object"))]
@@ -118,16 +182,30 @@ impl StringsInterner<'_> {
#[cfg(feature = "no_object")]
return self.strings.is_empty();
}
/// Clear all interned strings.
#[inline]
pub fn clear(&mut self) {
self.strings.clear();
#[cfg(not(feature = "no_object"))]
{
self.getters.clear();
self.setters.clear();
}
}
}
impl AddAssign<Self> for StringsInterner<'_> {
#[inline(always)]
fn add_assign(&mut self, rhs: Self) {
self.strings.extend(rhs.strings.into_iter());
#[cfg(not(feature = "no_object"))]
self.getters.extend(rhs.getters.into_iter());
#[cfg(not(feature = "no_object"))]
self.setters.extend(rhs.setters.into_iter());
{
self.getters.extend(rhs.getters.into_iter());
self.setters.extend(rhs.setters.into_iter());
}
}
}
@@ -135,12 +213,14 @@ impl AddAssign<&Self> for StringsInterner<'_> {
#[inline(always)]
fn add_assign(&mut self, rhs: &Self) {
self.strings
.extend(rhs.strings.iter().map(|(k, v)| (k.clone(), v.clone())));
.extend(rhs.strings.iter().map(|(&k, v)| (k, v.clone())));
#[cfg(not(feature = "no_object"))]
self.getters
.extend(rhs.getters.iter().map(|(k, v)| (k.clone(), v.clone())));
#[cfg(not(feature = "no_object"))]
self.setters
.extend(rhs.setters.iter().map(|(k, v)| (k.clone(), v.clone())));
{
self.getters
.extend(rhs.getters.iter().map(|(&k, v)| (k, v.clone())));
self.setters
.extend(rhs.setters.iter().map(|(&k, v)| (k, v.clone())));
}
}
}