Fine tune table-driven tokenizing.
This commit is contained in:
288
src/tokenizer.rs
288
src/tokenizer.rs
@@ -1,9 +1,6 @@
|
||||
//! Main module defining the lexer and parser.
|
||||
|
||||
use crate::engine::{
|
||||
Precedence, KEYWORD_DEBUG, KEYWORD_EVAL, KEYWORD_FN_PTR, KEYWORD_FN_PTR_CALL,
|
||||
KEYWORD_FN_PTR_CURRY, KEYWORD_IS_DEF_VAR, KEYWORD_PRINT, KEYWORD_TYPE_OF,
|
||||
};
|
||||
use crate::engine::Precedence;
|
||||
use crate::func::native::OnParseTokenCallback;
|
||||
use crate::{Engine, Identifier, LexError, Position, SmartString, StaticVec, INT, UNSIGNED_INT};
|
||||
use smallvec::SmallVec;
|
||||
@@ -308,7 +305,9 @@ impl fmt::Display for Token {
|
||||
}
|
||||
}
|
||||
|
||||
// Table-driven keyword recognizer generated by GNU gperf.
|
||||
// Table-driven keyword recognizer generated by GNU gperf on the file `tools/keywords.txt`.
|
||||
//
|
||||
// When adding new keywords, make sure to update `tools/keywords.txt` and re-generate this.
|
||||
|
||||
const MIN_KEYWORD_LEN: usize = 1;
|
||||
const MAX_KEYWORD_LEN: usize = 8;
|
||||
@@ -508,7 +507,9 @@ static KEYWORDS_LIST: [(&str, Token); 153] = [
|
||||
("#{", Token::MapStart),
|
||||
];
|
||||
|
||||
// Table-driven reserved symbol recognizer generated by GNU gperf.
|
||||
// Table-driven reserved symbol recognizer generated by GNU gperf on the file `tools/reserved.txt`.
|
||||
//
|
||||
// When adding new reserved symbols, make sure to update `tools/reserved.txt` and re-generate this.
|
||||
|
||||
const MIN_RESERVED_LEN: usize = 1;
|
||||
const MAX_RESERVED_LEN: usize = 10;
|
||||
@@ -530,120 +531,120 @@ static RESERVED_ASSOC_VALUES: [u8; 256] = [
|
||||
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
||||
113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
|
||||
];
|
||||
static RESERVED_LIST: [(&str, bool); 113] = [
|
||||
("", false),
|
||||
("~", true),
|
||||
("is", true),
|
||||
("...", true),
|
||||
("", false),
|
||||
("print", true),
|
||||
("@", true),
|
||||
("private", cfg!(feature = "no_function")),
|
||||
("", false),
|
||||
("this", true),
|
||||
("", false),
|
||||
("thread", true),
|
||||
("as", cfg!(feature = "no_module")),
|
||||
("", false),
|
||||
("", false),
|
||||
("spawn", true),
|
||||
("static", true),
|
||||
(":=", true),
|
||||
("===", true),
|
||||
("case", true),
|
||||
("super", true),
|
||||
("shared", true),
|
||||
("package", true),
|
||||
("use", true),
|
||||
("with", true),
|
||||
("curry", true),
|
||||
("$", true),
|
||||
("type_of", true),
|
||||
("nil", true),
|
||||
("sync", true),
|
||||
("yield", true),
|
||||
("import", cfg!(feature = "no_module")),
|
||||
("--", true),
|
||||
("new", true),
|
||||
("exit", true),
|
||||
("async", true),
|
||||
("export", cfg!(feature = "no_module")),
|
||||
("!.", true),
|
||||
("", false),
|
||||
("call", true),
|
||||
("match", true),
|
||||
("", false),
|
||||
("fn", cfg!(feature = "no_function")),
|
||||
("var", true),
|
||||
("null", true),
|
||||
("await", true),
|
||||
("#", true),
|
||||
("default", true),
|
||||
("!==", true),
|
||||
("eval", true),
|
||||
("debug", true),
|
||||
("?", true),
|
||||
("?.", cfg!(feature = "no_object")),
|
||||
("", false),
|
||||
("protected", true),
|
||||
("", false),
|
||||
("", false),
|
||||
("go", true),
|
||||
("", false),
|
||||
("goto", true),
|
||||
("", false),
|
||||
("public", true),
|
||||
("<-", true),
|
||||
("", false),
|
||||
("is_def_fn", cfg!(not(feature = "no_function"))),
|
||||
("is_def_var", true),
|
||||
("", false),
|
||||
("<|", true),
|
||||
("::<", true),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("->", true),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("module", true),
|
||||
("|>", true),
|
||||
("", false),
|
||||
("void", true),
|
||||
("", false),
|
||||
("", false),
|
||||
("#!", true),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("?[", cfg!(feature = "no_index")),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("Fn", true),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
(":;", true),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("++", true),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("*)", true),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("", false),
|
||||
("(*", true),
|
||||
static RESERVED_LIST: [(&str, bool, bool, bool); 113] = [
|
||||
("", false, false, false),
|
||||
("~", true, false, false),
|
||||
("is", true, false, false),
|
||||
("...", true, false, false),
|
||||
("", false, false, false),
|
||||
("print", true, true, false),
|
||||
("@", true, false, false),
|
||||
("private", cfg!(feature = "no_function"), false, false),
|
||||
("", false, false, false),
|
||||
("this", true, false, false),
|
||||
("", false, false, false),
|
||||
("thread", true, false, false),
|
||||
("as", cfg!(feature = "no_module"), false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("spawn", true, false, false),
|
||||
("static", true, false, false),
|
||||
(":=", true, false, false),
|
||||
("===", true, false, false),
|
||||
("case", true, false, false),
|
||||
("super", true, false, false),
|
||||
("shared", true, false, false),
|
||||
("package", true, false, false),
|
||||
("use", true, false, false),
|
||||
("with", true, false, false),
|
||||
("curry", true, true, true),
|
||||
("$", true, false, false),
|
||||
("type_of", true, true, true),
|
||||
("nil", true, false, false),
|
||||
("sync", true, false, false),
|
||||
("yield", true, false, false),
|
||||
("import", cfg!(feature = "no_module"), false, false),
|
||||
("--", true, false, false),
|
||||
("new", true, false, false),
|
||||
("exit", true, false, false),
|
||||
("async", true, false, false),
|
||||
("export", cfg!(feature = "no_module"), false, false),
|
||||
("!.", true, false, false),
|
||||
("", false, false, false),
|
||||
("call", true, true, true),
|
||||
("match", true, false, false),
|
||||
("", false, false, false),
|
||||
("fn", cfg!(feature = "no_function"), false, false),
|
||||
("var", true, false, false),
|
||||
("null", true, false, false),
|
||||
("await", true, false, false),
|
||||
("#", true, false, false),
|
||||
("default", true, false, false),
|
||||
("!==", true, false, false),
|
||||
("eval", true, true, false),
|
||||
("debug", true, true, false),
|
||||
("?", true, false, false),
|
||||
("?.", cfg!(feature = "no_object"), false, false),
|
||||
("", false, false, false),
|
||||
("protected", true, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("go", true, false, false),
|
||||
("", false, false, false),
|
||||
("goto", true, false, false),
|
||||
("", false, false, false),
|
||||
("public", true, false, false),
|
||||
("<-", true, false, false),
|
||||
("", false, false, false),
|
||||
("is_def_fn", cfg!(not(feature = "no_function")), true, false),
|
||||
("is_def_var", true, true, false),
|
||||
("", false, false, false),
|
||||
("<|", true, false, false),
|
||||
("::<", true, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("->", true, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("module", true, false, false),
|
||||
("|>", true, false, false),
|
||||
("", false, false, false),
|
||||
("void", true, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("#!", true, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("?[", cfg!(feature = "no_index"), false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("Fn", true, true, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
(":;", true, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("++", true, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("*)", true, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("", false, false, false),
|
||||
("(*", true, false, false),
|
||||
];
|
||||
|
||||
impl Token {
|
||||
@@ -2250,7 +2251,7 @@ fn parse_identifier_token(
|
||||
return (token, start_pos);
|
||||
}
|
||||
|
||||
if is_reserved_keyword_or_symbol(&identifier) {
|
||||
if is_reserved_keyword_or_symbol(&identifier).0 {
|
||||
return (Token::Reserved(Box::new(identifier)), start_pos);
|
||||
}
|
||||
|
||||
@@ -2264,30 +2265,6 @@ fn parse_identifier_token(
|
||||
(Token::Identifier(identifier.into()), start_pos)
|
||||
}
|
||||
|
||||
/// Can a keyword be called like a function?
|
||||
///
|
||||
/// # Return values
|
||||
///
|
||||
/// The first `bool` indicates whether the keyword can be called normally as a function.
|
||||
///
|
||||
/// The second `bool` indicates whether the keyword can be called in method-call style.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn is_keyword_function(name: &str) -> (bool, bool) {
|
||||
match name {
|
||||
KEYWORD_TYPE_OF | KEYWORD_FN_PTR_CALL | KEYWORD_FN_PTR_CURRY => (true, true),
|
||||
|
||||
KEYWORD_PRINT | KEYWORD_DEBUG | KEYWORD_EVAL | KEYWORD_FN_PTR | KEYWORD_IS_DEF_VAR => {
|
||||
(true, false)
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "no_function"))]
|
||||
crate::engine::KEYWORD_IS_DEF_FN => (true, false),
|
||||
|
||||
_ => (false, false),
|
||||
}
|
||||
}
|
||||
|
||||
/// _(internals)_ Is a text string a valid identifier?
|
||||
/// Exported under the `internals` feature only.
|
||||
#[must_use]
|
||||
@@ -2313,7 +2290,7 @@ pub fn is_valid_identifier(name: &str) -> bool {
|
||||
#[must_use]
|
||||
pub fn is_valid_function_name(name: &str) -> bool {
|
||||
is_valid_identifier(name)
|
||||
&& !is_reserved_keyword_or_symbol(name)
|
||||
&& !is_reserved_keyword_or_symbol(name).0
|
||||
&& Token::lookup_symbol_from_syntax(name).is_none()
|
||||
}
|
||||
|
||||
@@ -2350,16 +2327,24 @@ pub const fn is_id_continue(x: char) -> bool {
|
||||
}
|
||||
|
||||
/// Is a piece of syntax a reserved keyword or reserved symbol?
|
||||
///
|
||||
/// # Return values
|
||||
///
|
||||
/// The first `bool` indicates whether it is a reserved keyword or symbol.
|
||||
///
|
||||
/// The second `bool` indicates whether the keyword can be called normally as a function.
|
||||
///
|
||||
/// The third `bool` indicates whether the keyword can be called in method-call style.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn is_reserved_keyword_or_symbol(syntax: &str) -> bool {
|
||||
pub fn is_reserved_keyword_or_symbol(syntax: &str) -> (bool, bool, bool) {
|
||||
let utf8 = syntax.as_bytes();
|
||||
let len = utf8.len();
|
||||
let rounds = len.min(3);
|
||||
let mut hash_val = len;
|
||||
|
||||
if !(MIN_RESERVED_LEN..=MAX_RESERVED_LEN).contains(&len) {
|
||||
return false;
|
||||
return (false, false, false);
|
||||
}
|
||||
|
||||
for x in 0..rounds {
|
||||
@@ -2367,12 +2352,13 @@ pub fn is_reserved_keyword_or_symbol(syntax: &str) -> bool {
|
||||
}
|
||||
|
||||
if !(MIN_RESERVED_HASH_VALUE..=MAX_RESERVED_HASH_VALUE).contains(&hash_val) {
|
||||
return false;
|
||||
return (false, false, false);
|
||||
}
|
||||
|
||||
match RESERVED_LIST[hash_val] {
|
||||
(s, t) if s == syntax => t,
|
||||
_ => false,
|
||||
("", ..) => (false, false, false),
|
||||
(s, true, a, b) => (s == syntax, a, b),
|
||||
_ => (false, false, false),
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user