Fine tune table-driven tokenizing.

This commit is contained in:
Stephen Chung
2023-03-15 17:22:11 +08:00
parent 2aa7b99d1e
commit 41636eac55
11 changed files with 351 additions and 159 deletions

View File

@@ -1,9 +1,6 @@
//! Main module defining the lexer and parser.
use crate::engine::{
Precedence, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_FN_PTR, KEYWORD_FN_PTR_CALL,
KEYWORD_FN_PTR_CURRY, KEYWORD_IS_DEF_VAR, KEYWORD_PRINT, KEYWORD_TYPE_OF,
};
use crate::engine::Precedence;
use crate::func::native::OnParseTokenCallback;
use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT};
use smallvec::SmallVec;
@@ -308,7 +305,9 @@ impl fmt::Display for Token {
}
}
// Table-driven keyword recognizer generated by GNU gperf.
// Table-driven keyword recognizer generated by GNU gperf on the file `tools/keywords.txt`.
//
// When adding new keywords, make sure to update `tools/keywords.txt` and re-generate this.
const MIN_KEYWORD_LEN: usize = 1;
const MAX_KEYWORD_LEN: usize = 8;
@@ -508,7 +507,9 @@ static KEYWORDS_LIST: [(&str, Token); 153] = [
("#{", Token::MapStart),
];
// Table-driven reserved symbol recognizer generated by GNU gperf.
// Table-driven reserved symbol recognizer generated by GNU gperf on the file `tools/reserved.txt`.
//
// When adding new reserved symbols, make sure to update `tools/reserved.txt` and re-generate this.
const MIN_RESERVED_LEN: usize = 1;
const MAX_RESERVED_LEN: usize = 10;
@@ -530,120 +531,120 @@ static RESERVED_ASSOC_VALUES: [u8; 256] = [
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
];
static RESERVED_LIST: [(&str, bool); 113] = [
("", false),
("~", true),
("is", true),
("...", true),
("", false),
("print", true),
("@", true),
("private", cfg!(feature = "no_function")),
("", false),
("this", true),
("", false),
("thread", true),
("as", cfg!(feature = "no_module")),
("", false),
("", false),
("spawn", true),
("static", true),
(":=", true),
("===", true),
("case", true),
("super", true),
("shared", true),
("package", true),
("use", true),
("with", true),
("curry", true),
("$", true),
("type_of", true),
("nil", true),
("sync", true),
("yield", true),
("import", cfg!(feature = "no_module")),
("--", true),
("new", true),
("exit", true),
("async", true),
("export", cfg!(feature = "no_module")),
("!.", true),
("", false),
("call", true),
("match", true),
("", false),
("fn", cfg!(feature = "no_function")),
("var", true),
("null", true),
("await", true),
("#", true),
("default", true),
("!==", true),
("eval", true),
("debug", true),
("?", true),
("?.", cfg!(feature = "no_object")),
("", false),
("protected", true),
("", false),
("", false),
("go", true),
("", false),
("goto", true),
("", false),
("public", true),
("<-", true),
("", false),
("is_def_fn", cfg!(not(feature = "no_function"))),
("is_def_var", true),
("", false),
("<|", true),
("::<", true),
("", false),
("", false),
("", false),
("->", true),
("", false),
("", false),
("", false),
("module", true),
("|>", true),
("", false),
("void", true),
("", false),
("", false),
("#!", true),
("", false),
("", false),
("", false),
("", false),
("?[", cfg!(feature = "no_index")),
("", false),
("", false),
("", false),
("", false),
("Fn", true),
("", false),
("", false),
("", false),
("", false),
(":;", true),
("", false),
("", false),
("", false),
("", false),
("++", true),
("", false),
("", false),
("", false),
("", false),
("*)", true),
("", false),
("", false),
("", false),
("", false),
("(*", true),
static RESERVED_LIST: [(&str, bool, bool, bool); 113] = [
("", false, false, false),
("~", true, false, false),
("is", true, false, false),
("...", true, false, false),
("", false, false, false),
("print", true, true, false),
("@", true, false, false),
("private", cfg!(feature = "no_function"), false, false),
("", false, false, false),
("this", true, false, false),
("", false, false, false),
("thread", true, false, false),
("as", cfg!(feature = "no_module"), false, false),
("", false, false, false),
("", false, false, false),
("spawn", true, false, false),
("static", true, false, false),
(":=", true, false, false),
("===", true, false, false),
("case", true, false, false),
("super", true, false, false),
("shared", true, false, false),
("package", true, false, false),
("use", true, false, false),
("with", true, false, false),
("curry", true, true, true),
("$", true, false, false),
("type_of", true, true, true),
("nil", true, false, false),
("sync", true, false, false),
("yield", true, false, false),
("import", cfg!(feature = "no_module"), false, false),
("--", true, false, false),
("new", true, false, false),
("exit", true, false, false),
("async", true, false, false),
("export", cfg!(feature = "no_module"), false, false),
("!.", true, false, false),
("", false, false, false),
("call", true, true, true),
("match", true, false, false),
("", false, false, false),
("fn", cfg!(feature = "no_function"), false, false),
("var", true, false, false),
("null", true, false, false),
("await", true, false, false),
("#", true, false, false),
("default", true, false, false),
("!==", true, false, false),
("eval", true, true, false),
("debug", true, true, false),
("?", true, false, false),
("?.", cfg!(feature = "no_object"), false, false),
("", false, false, false),
("protected", true, false, false),
("", false, false, false),
("", false, false, false),
("go", true, false, false),
("", false, false, false),
("goto", true, false, false),
("", false, false, false),
("public", true, false, false),
("<-", true, false, false),
("", false, false, false),
("is_def_fn", cfg!(not(feature = "no_function")), true, false),
("is_def_var", true, true, false),
("", false, false, false),
("<|", true, false, false),
("::<", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("->", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("module", true, false, false),
("|>", true, false, false),
("", false, false, false),
("void", true, false, false),
("", false, false, false),
("", false, false, false),
("#!", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("?[", cfg!(feature = "no_index"), false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("Fn", true, true, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
(":;", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("++", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("*)", true, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("", false, false, false),
("(*", true, false, false),
];
impl Token {
@@ -2250,7 +2251,7 @@ fn parse_identifier_token(
return (token, start_pos);
}
if is_reserved_keyword_or_symbol(&identifier) {
if is_reserved_keyword_or_symbol(&identifier).0 {
return (Token::Reserved(Box::new(identifier)), start_pos);
}
@@ -2264,30 +2265,6 @@ fn parse_identifier_token(
(Token::Identifier(identifier.into()), start_pos)
}
/// Can a keyword be called like a function?
///
/// # Return values
///
/// The first `bool` indicates whether the keyword can be called normally as a function.
///
/// The second `bool` indicates whether the keyword can be called in method-call style.
#[inline]
#[must_use]
pub fn is_keyword_function(name: &str) -> (bool, bool) {
match name {
KEYWORD_TYPE_OF | KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY => (true, true),
KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_EVAL | KEYWORD_FN_PTR | KEYWORD_IS_DEF_VAR => {
(true, false)
}
#[cfg(not(feature = "no_function"))]
crate::engine::KEYWORD_IS_DEF_FN => (true, false),
_ => (false, false),
}
}
/// _(internals)_ Is a text string a valid identifier?
/// Exported under the `internals` feature only.
#[must_use]
@@ -2313,7 +2290,7 @@ pub fn is_valid_identifier(name: &str) -> bool {
#[must_use]
pub fn is_valid_function_name(name: &str) -> bool {
is_valid_identifier(name)
&& !is_reserved_keyword_or_symbol(name)
&& !is_reserved_keyword_or_symbol(name).0
&& Token::lookup_symbol_from_syntax(name).is_none()
}
@@ -2350,16 +2327,24 @@ pub const fn is_id_continue(x: char) -> bool {
}
/// Is a piece of syntax a reserved keyword or reserved symbol?
///
/// # Return values
///
/// The first `bool` indicates whether it is a reserved keyword or symbol.
///
/// The second `bool` indicates whether the keyword can be called normally as a function.
///
/// The third `bool` indicates whether the keyword can be called in method-call style.
#[inline]
#[must_use]
pub fn is_reserved_keyword_or_symbol(syntax: &str) -> bool {
pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
let utf8 = syntax.as_bytes();
let len = utf8.len();
let rounds = len.min(3);
let mut hash_val = len;
if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
return false;
return (false, false, false);
}
for x in 0..rounds {
@@ -2367,12 +2352,13 @@ pub fn is_reserved_keyword_or_symbol(syntax: &str) -> bool {
}
if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
return false;
return (false, false, false);
}
match RESERVED_LIST[hash_val] {
(s, t) if s == syntax => t,
_ => false,
("", ..) => (false, false, false),
(s, true, a, b) => (s == syntax, a, b),
_ => (false, false, false),
}
}