diff --git a/Cargo.toml b/Cargo.toml index f049777e..ec00a10c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ categories = [ "no-std", "embedded", "wasm", "parser-implementations" ] [dependencies] num-traits = { version = "0.2.11", default-features = false } +smallvec = { version = "1.4.1", default-features = false } [features] #default = ["unchecked", "sync", "no_optimize", "no_float", "only_i32", "no_index", "no_object", "no_function", "no_module"] @@ -32,6 +33,7 @@ only_i64 = [] # set INT=i64 (default) and disable support for all other in no_index = [] # no arrays and indexing no_object = [] # no custom objects no_function = [] # no script-defined functions +no_capture = [] # no automatic read/write binding of anonymous function's local variables to it's external context no_module = [] # no modules internals = [] # expose internal data structures unicode-xid-ident = ["unicode-xid"] # allow Unicode Standard Annex #31 for identifiers. diff --git a/benches/eval_expression.rs b/benches/eval_expression.rs index ceaaaa17..82fd1b78 100644 --- a/benches/eval_expression.rs +++ b/benches/eval_expression.rs @@ -48,7 +48,7 @@ fn bench_eval_expression_optimized_simple(bench: &mut Bencher) { 2 > 1 && "something" != "nothing" || "2014-01-20" < "Wed Jul 8 23:07:35 MDT 2015" && - [array, with, spaces].len <= #{prop:name}.len && + [array, has, spaces].len <= #{prop:name}.len && modifierTest + 1000 / 2 > (80 * 100 % 2) "#; @@ -65,7 +65,7 @@ fn bench_eval_expression_optimized_full(bench: &mut Bencher) { 2 > 1 && "something" != "nothing" || "2014-01-20" < "Wed Jul 8 23:07:35 MDT 2015" && - [array, with, spaces].len <= #{prop:name}.len && + [array, has, spaces].len <= #{prop:name}.len && modifierTest + 1000 / 2 > (80 * 100 % 2) "#; @@ -82,7 +82,7 @@ fn bench_eval_call_expression(bench: &mut Bencher) { 2 > 1 && "something" != "nothing" || "2014-01-20" < "Wed Jul 8 23:07:35 MDT 2015" && - [array, with, spaces].len <= #{prop:name}.len && + [array, has, spaces].len <= #{prop:name}.len && modifierTest + 1000 / 2 > (80 * 100 % 2) "#; @@ -97,7 +97,7 @@ fn bench_eval_call(bench: &mut Bencher) { 2 > 1 && "something" != "nothing" || "2014-01-20" < "Wed Jul 8 23:07:35 MDT 2015" && - [array, with, spaces].len <= #{prop:name}.len && + [array, has, spaces].len <= #{prop:name}.len && modifierTest + 1000 / 2 > (80 * 100 % 2) "#; diff --git a/benches/parsing.rs b/benches/parsing.rs index 8e84bd8d..6bde9dd1 100644 --- a/benches/parsing.rs +++ b/benches/parsing.rs @@ -32,7 +32,7 @@ fn bench_parse_full(bench: &mut Bencher) { 2 > 1 && "something" != "nothing" || "2014-01-20" < "Wed Jul 8 23:07:35 MDT 2015" && - [array, with, spaces].len <= #{prop:name}.len && + [array, has, spaces].len <= #{prop:name}.len && modifierTest + 1000 / 2 > (80 * 100 % 2) "#; @@ -109,7 +109,7 @@ fn bench_parse_optimize_simple(bench: &mut Bencher) { 2 > 1 && "something" != "nothing" || "2014-01-20" < "Wed Jul 8 23:07:35 MDT 2015" && - [array, with, spaces].len <= #{prop:name}.len && + [array, has, spaces].len <= #{prop:name}.len && modifierTest + 1000 / 2 > (80 * 100 % 2) "#; @@ -125,7 +125,7 @@ fn bench_parse_optimize_full(bench: &mut Bencher) { 2 > 1 && "something" != "nothing" || "2014-01-20" < "Wed Jul 8 23:07:35 MDT 2015" && - [array, with, spaces].len <= #{prop:name}.len && + [array, has, spaces].len <= #{prop:name}.len && modifierTest + 1000 / 2 > (80 * 100 % 2) "#; diff --git a/doc/src/SUMMARY.md b/doc/src/SUMMARY.md index 3592dbc5..78ea9882 100644 --- a/doc/src/SUMMARY.md +++ b/doc/src/SUMMARY.md @@ -79,6 +79,7 @@ The Rhai Scripting Language 4. [Function Pointers](language/fn-ptr.md) 5. [Anonymous Functions](language/fn-anon.md) 6. [Currying](language/fn-curry.md) + 7. [Capturing External Variables](language/fn-closure.md) 16. [Print and Debug](language/print-debug.md) 17. [Modules](language/modules/index.md) 1. [Export Variables, Functions and Sub-Modules](language/modules/export.md) diff --git a/doc/src/about/features.md b/doc/src/about/features.md index ffa28cdc..245668b2 100644 --- a/doc/src/about/features.md +++ b/doc/src/about/features.md @@ -14,7 +14,7 @@ Easy * Easily [call a script-defined function]({{rootUrl}}/engine/call-fn.md) from Rust. -* Very few additional dependencies (right now only [`num-traits`](https://crates.io/crates/num-traits/) to do checked arithmetic operations); +* Very few additional dependencies (right now only [`num-traits`](https://crates.io/crates/num-traits/) to do checked arithmetic operations, and [`smallvec`](https://crates.io/crates/smallvec/)); for [`no-std`] builds, a number of additional dependencies are pulled in to provide for functionalities that used to be in `std`. Fast diff --git a/doc/src/engine/dsl.md b/doc/src/engine/dsl.md index 5b381d4a..8f326ed1 100644 --- a/doc/src/engine/dsl.md +++ b/doc/src/engine/dsl.md @@ -15,6 +15,17 @@ The [`Engine::eval_expression_XXX`][`eval_expression`] API can be used to restri a script to expressions only. +Unicode Standard Annex #31 Identifiers +------------------------------------- + +Variable names and other identifiers do not necessarily need to be ASCII-only. + +The [`unicode-xid-ident`] feature, when turned on, causes Rhai to allow variable names and identifiers +that follow [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/). + +This is sometimes useful in a non-English DSL. + + Disable Keywords and/or Operators -------------------------------- diff --git a/doc/src/language/fn-anon.md b/doc/src/language/fn-anon.md index 21ceb7a3..a4cb0fee 100644 --- a/doc/src/language/fn-anon.md +++ b/doc/src/language/fn-anon.md @@ -49,9 +49,13 @@ fn anon_fn_1001(x) { this.data -= x; } fn anon_fn_1002() { print this.data; } ``` + WARNING - NOT Closures ---------------------- Remember: anonymous functions, though having the same syntax as Rust _closures_, are themselves **not** closures. In particular, they do not capture their running environment. They are more like Rust's function pointers. + +They do, however, _capture_ variable _values_ from their execution environment, unless the [`no_capture`] +feature is turned on. This is accomplished via [automatic currying][capture]. diff --git a/doc/src/language/fn-closure.md b/doc/src/language/fn-closure.md new file mode 100644 index 00000000..d8749601 --- /dev/null +++ b/doc/src/language/fn-closure.md @@ -0,0 +1,54 @@ +Capture External Variables via Automatic Currying +================================================ + +Poor Man's Closures +------------------- + +Since [anonymous functions] de-sugar to standard function definitions, they retain all the behaviors of +Rhai functions, including being _pure_, having no access to external variables. + +The anonymous function syntax, however, automatically _captures_ variables that are not defined within +the current scope, but are defined in the external scope - i.e. the scope where the anonymous function +is created. + +Variables that are accessible during the time the [anonymous function] is created can be captured, +as long as they are not shadowed by local variables defined within the function's scope. +The values captured are the values of those variables at the time of the [anonymous function]'s creation. + + +New Parameters For Captured Variables +------------------------------------ + +In actual implementation, this de-sugars to: + +1. Keeping track of what variables are accessed inside the anonymous function, + +2. If a variable is not defined within the anonymous function's scope, it is looked up _outside_ the function and in the current execution scope - where the anonymous function is created. + +3. The variable is added to the parameters list of the anonymous function, at the front. + +4. The current value of the variable is then [curried][currying] into the [function pointer] itself, essentially carrying that value and inserting it into future calls of the function. + +Automatic currying can be turned off via the [`no_capture`] feature. + + +Examples +-------- + +```rust +let x = 40; + +let f = |y| x + y; // current value of variable 'x' is auto-curried + // the value 40 is curried into 'f' + +x = 1; // 'x' can be changed but the curried value is not + +f.call(2) == 42; // the value of 'x' is still 40 + +// The above de-sugars into this: +fn anon$1001(x, y) { x + y } // parameter 'x' is inserted + +let f = Fn("anon$1001").curry(x); // current value of 'x' is curried + +f.call(2) == 42; +``` diff --git a/doc/src/language/fn-curry.md b/doc/src/language/fn-curry.md index 8ee103a9..71c8933a 100644 --- a/doc/src/language/fn-curry.md +++ b/doc/src/language/fn-curry.md @@ -28,3 +28,12 @@ let curried = curry(func, 21); // function-call style also works curried.call(2) == 42; // <- de-sugars to 'func.call(21, 2)' // only one argument is now required ``` + + +Automatic Currying +------------------ + +[Anonymous functions] defined via a closure syntax _capture_ external variables that are not shadowed inside +the function's scope. + +This is accomplished via [automatic currying]. diff --git a/doc/src/language/oop.md b/doc/src/language/oop.md index e34a8ab2..09bc7b55 100644 --- a/doc/src/language/oop.md +++ b/doc/src/language/oop.md @@ -21,6 +21,18 @@ When a property of an [object map] is called like a method function, and if it h a valid [function pointer] (perhaps defined via an [anonymous function]), then the call will be dispatched to the actual function with `this` binding to the [object map] itself. + +Use Anonymous Functions to Define Methods +---------------------------------------- + +[Anonymous functions] defined as values for [object map] properties take on a syntactic shape +that resembles very closely that of class methods in an OOP language. + +Anonymous functions can also _capture_ variables from the defining environment, which is a very +common OOP pattern. Capturing is accomplished via a feature called _[automatic currying]_ and +can be turned off via the [`no_capture`] feature. + + Examples -------- diff --git a/doc/src/links.md b/doc/src/links.md index 936c714d..19fdf0e9 100644 --- a/doc/src/links.md +++ b/doc/src/links.md @@ -9,6 +9,7 @@ [`no_object`]: {{rootUrl}}/start/features.md [`no_function`]: {{rootUrl}}/start/features.md [`no_module`]: {{rootUrl}}/start/features.md +[`no_capture`]: {{rootUrl}}/start/features.md [`no_std`]: {{rootUrl}}/start/features.md [`no-std`]: {{rootUrl}}/start/features.md [`internals`]: {{rootUrl}}/start/features.md @@ -78,6 +79,8 @@ [function pointer]: {{rootUrl}}/language/fn-ptr.md [function pointers]: {{rootUrl}}/language/fn-ptr.md [currying]: {{rootUrl}}/language/fn-curry.md +[capture]: {{rootUrl}}/language/fn-closure.md +[automatic currying]: {{rootUrl}}/language/fn-closure.md [function namespace]: {{rootUrl}}/language/fn-namespaces.md [function namespaces]: {{rootUrl}}/language/fn-namespaces.md [anonymous function]: {{rootUrl}}/language/fn-anon.md diff --git a/doc/src/start/features.md b/doc/src/start/features.md index 0f6b0f53..7a06618a 100644 --- a/doc/src/start/features.md +++ b/doc/src/start/features.md @@ -23,6 +23,7 @@ more control over what a script can (or cannot) do. | `no_object` | Disable support for [custom types] and [object maps]. | | `no_function` | Disable script-defined [functions]. | | `no_module` | Disable loading external [modules]. | +| `no_capture` | Disable capturing external variables in [anonymous functions]. | | `no_std` | Build for `no-std`. Notice that additional dependencies will be pulled in to replace `std` features. | | `serde` | Enable serialization/deserialization via `serde`. Notice that the [`serde`](https://crates.io/crates/serde) crate will be pulled in together with its dependencies. | | `internals` | Expose internal data structures (e.g. [`AST`] nodes). Beware that Rhai internals are volatile and may change from version to version. | diff --git a/src/engine.rs b/src/engine.rs index e8cf5c3c..7806382f 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -445,7 +445,7 @@ pub fn search_imports<'s>( state: &mut State, modules: &Box, ) -> Result<&'s Module, Box> { - let (root, root_pos) = modules.get(0); + let (root, root_pos) = &modules[0]; // Qualified - check if the root module is directly indexed let index = if state.always_search { @@ -478,7 +478,7 @@ pub fn search_imports_mut<'s>( state: &mut State, modules: &Box, ) -> Result<&'s mut Module, Box> { - let (root, root_pos) = modules.get(0); + let (root, root_pos) = &modules[0]; // Qualified - check if the root module is directly indexed let index = if state.always_search { @@ -652,7 +652,7 @@ impl Engine { }; // Pop the last index value - let idx_val = idx_values.pop(); + let idx_val = idx_values.pop().unwrap(); match chain_type { #[cfg(not(feature = "no_index"))] @@ -1007,7 +1007,7 @@ impl Engine { idx_values.push(Dynamic::from(arg_values)); } Expr::FnCall(_) => unreachable!(), - Expr::Property(_) => idx_values.push(()), // Store a placeholder - no need to copy the property name + Expr::Property(_) => idx_values.push(().into()), // Store a placeholder - no need to copy the property name Expr::Index(x) | Expr::Dot(x) => { let (lhs, rhs, _) = x.as_ref(); diff --git a/src/lib.rs b/src/lib.rs index 96c514fa..1ba4cda1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,23 +48,7 @@ //! } //! ``` //! -//! ## Optional features -//! -//! | Feature | Description | -//! | ------------- | ----------------------------------------------------------------------------------------------------------------------------------| -//! | `unchecked` | Exclude arithmetic checking (such as overflows and division by zero). Beware that a bad script may panic the entire system! | -//! | `no_function` | Disable script-defined functions if not needed. | -//! | `no_module` | Disable loading external modules if not needed. | -//! | `no_index` | Disable arrays and indexing features if not needed. | -//! | `no_object` | Disable support for custom types and objects. | -//! | `no_float` | Disable floating-point numbers and math if not needed. | -//! | `no_optimize` | Disable the script optimizer. | -//! | `only_i32` | Set the system integer type to `i32` and disable all other integer types. `INT` is set to `i32`. | -//! | `only_i64` | Set the system integer type to `i64` and disable all other integer types. `INT` is set to `i64`. | -//! | `no_std` | Build for `no-std`. Notice that additional dependencies will be pulled in to replace `std` features. | -//! | `sync` | Restrict all values types to those that are `Send + Sync`. Under this feature, `Engine`, `Scope` and `AST` are all `Send + Sync`. | -//! | `serde` | Enable serialization/deserialization via `serde`. Notice that the [`serde`](https://crates.io/crates/serde) crate will be pulled in together with its dependencies. | -//! | `internals` | Expose internal data structures (beware they may be volatile from version to version). | +//! # Documentation //! //! See [The Rhai Book](https://schungx.github.io/rhai) for details on the Rhai script engine and language. diff --git a/src/optimize.rs b/src/optimize.rs index 27f6f816..b46b7488 100644 --- a/src/optimize.rs +++ b/src/optimize.rs @@ -436,7 +436,7 @@ fn optimize_expr(expr: Expr, state: &mut State) -> Expr { // Array literal where everything is pure - promote the indexed item. // All other items can be thrown away. state.set_dirty(); - let mut expr = a.0.take(i.0 as usize); + let mut expr = a.0.remove(i.0 as usize); expr.set_position(a.1); expr } diff --git a/src/parser.rs b/src/parser.rs index 417c35fb..3068bad0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -15,6 +15,9 @@ use crate::utils::{StaticVec, StraightHasherBuilder}; #[cfg(not(feature = "no_function"))] use crate::engine::FN_ANONYMOUS; +#[cfg(not(feature = "no_capture"))] +use crate::engine::KEYWORD_FN_PTR_CURRY; + #[cfg(not(feature = "no_object"))] use crate::engine::{make_getter, make_setter}; @@ -405,6 +408,9 @@ struct ParseState<'e> { engine: &'e Engine, /// Encapsulates a local stack with variable names to simulate an actual runtime scope. stack: Vec<(String, ScopeEntryType)>, + /// Tracks a list of external variables (variables that are not explicitly declared in the scope). + #[cfg(not(feature = "no_capture"))] + externals: HashMap, /// Encapsulates a local stack with variable names to simulate an actual runtime scope. modules: Vec, /// Maximum levels of expression nesting. @@ -424,30 +430,45 @@ impl<'e> ParseState<'e> { ) -> Self { Self { engine, - stack: Default::default(), - modules: Default::default(), #[cfg(not(feature = "unchecked"))] max_expr_depth, #[cfg(not(feature = "unchecked"))] max_function_expr_depth, + #[cfg(not(feature = "no_capture"))] + externals: Default::default(), + stack: Default::default(), + modules: Default::default(), } } - /// Find a variable by name in the `ParseState`, searching in reverse. + + /// Find explicitly declared variable by name in the `ParseState`, searching in reverse order. + /// + /// If the variable is not present in the scope adds it to the list of external variables + /// /// The return value is the offset to be deducted from `Stack::len`, /// i.e. the top element of the `ParseState` is offset 1. - /// Return zero when the variable name is not found in the `ParseState`. - pub fn find_var(&self, name: &str) -> Option { - self.stack + /// Return `None` when the variable name is not found in the `stack`. + fn access_var(&mut self, name: &str, pos: Position) -> Option { + let index = self + .stack .iter() .rev() .enumerate() .find(|(_, (n, _))| *n == name) - .and_then(|(i, _)| NonZeroUsize::new(i + 1)) + .and_then(|(i, _)| NonZeroUsize::new(i + 1)); + + #[cfg(not(feature = "no_capture"))] + if index.is_none() && !self.externals.contains_key(name) { + self.externals.insert(name.to_string(), pos); + } + + index } + /// Find a module by name in the `ParseState`, searching in reverse. /// The return value is the offset to be deducted from `Stack::len`, /// i.e. the top element of the `ParseState` is offset 1. - /// Return zero when the variable name is not found in the `ParseState`. + /// Return `None` when the variable name is not found in the `ParseState`. pub fn find_module(&self, name: &str) -> Option { self.modules .iter() @@ -1103,7 +1124,7 @@ fn parse_fn_call( eat_token(input, Token::RightParen); let hash_script = if let Some(modules) = modules.as_mut() { - modules.set_index(state.find_module(&modules.get(0).0)); + modules.set_index(state.find_module(&modules[0].0)); // Rust functions are indexed in two steps: // 1) Calculate a hash in a similar manner to script-defined functions, @@ -1145,7 +1166,7 @@ fn parse_fn_call( eat_token(input, Token::RightParen); let hash_script = if let Some(modules) = modules.as_mut() { - modules.set_index(state.find_module(&modules.get(0).0)); + modules.set_index(state.find_module(&modules[0].0)); // Rust functions are indexed in two steps: // 1) Calculate a hash in a similar manner to script-defined functions, @@ -1577,7 +1598,7 @@ fn parse_primary( Token::CharConstant(c) => Expr::CharConstant(Box::new((c, settings.pos))), Token::StringConstant(s) => Expr::StringConstant(Box::new((s.into(), settings.pos))), Token::Identifier(s) => { - let index = state.find_var(&s); + let index = state.access_var(&s, settings.pos); Expr::Variable(Box::new(((s, settings.pos), None, 0, index))) } // Function call is allowed to have reserved keyword @@ -1678,7 +1699,7 @@ fn parse_primary( // Qualifiers + variable name *hash = calc_fn_hash(modules.iter().map(|(v, _)| v.as_str()), name, 0, empty()); - modules.set_index(state.find_module(&modules.get(0).0)); + modules.set_index(state.find_module(&modules[0].0)); } _ => (), } @@ -1778,7 +1799,7 @@ fn parse_unary( // | ... #[cfg(not(feature = "no_function"))] Token::Pipe | Token::Or => { - let mut state = ParseState::new( + let mut new_state = ParseState::new( state.engine, #[cfg(not(feature = "unchecked"))] state.max_function_expr_depth, @@ -1797,7 +1818,12 @@ fn parse_unary( pos: *token_pos, }; - let (expr, func) = parse_anon_fn(input, &mut state, lib, settings)?; + let (expr, func) = parse_anon_fn(input, &mut new_state, lib, settings)?; + + #[cfg(not(feature = "no_capture"))] + new_state.externals.iter().for_each(|(closure, pos)| { + state.access_var(closure, *pos); + }); // Qualifiers (none) + function name + number of arguments. let hash = calc_fn_hash(empty(), &func.name, func.params.len(), empty()); @@ -1936,7 +1962,7 @@ fn make_dot_expr(lhs: Expr, rhs: Expr, op_pos: Position) -> Result { - return Err(PERR::PropertyExpected.into_err(x.1.unwrap().get(0).1)); + return Err(PERR::PropertyExpected.into_err(x.1.unwrap()[0].1)); } // lhs.prop (lhs, prop @ Expr::Property(_)) => Expr::Dot(Box::new((lhs, prop, op_pos))), @@ -2197,25 +2223,25 @@ fn parse_binary_op( | Token::GreaterThanEqualsTo => Expr::FnCall(Box::new((op, None, hash, args, cmp_def))), Token::Or => { - let rhs = args.pop(); - let current_lhs = args.pop(); + let rhs = args.pop().unwrap(); + let current_lhs = args.pop().unwrap(); Expr::Or(Box::new((current_lhs, rhs, pos))) } Token::And => { - let rhs = args.pop(); - let current_lhs = args.pop(); + let rhs = args.pop().unwrap(); + let current_lhs = args.pop().unwrap(); Expr::And(Box::new((current_lhs, rhs, pos))) } Token::In => { - let rhs = args.pop(); - let current_lhs = args.pop(); + let rhs = args.pop().unwrap(); + let current_lhs = args.pop().unwrap(); make_in_expr(current_lhs, rhs, pos)? } #[cfg(not(feature = "no_object"))] Token::Period => { - let rhs = args.pop(); - let current_lhs = args.pop(); + let rhs = args.pop().unwrap(); + let current_lhs = args.pop().unwrap(); make_dot_expr(current_lhs, rhs, pos)? } @@ -3024,6 +3050,46 @@ fn parse_fn( }) } +/// Creates a curried expression from a list of external variables +#[cfg(not(feature = "no_capture"))] +fn make_curry_from_externals( + fn_expr: Expr, + state: &mut ParseState, + settings: &ParseSettings, +) -> Expr { + if state.externals.is_empty() { + return fn_expr; + } + + let mut args: StaticVec<_> = Default::default(); + + state.externals.iter().for_each(|(var_name, pos)| { + args.push(Expr::Variable(Box::new(( + (var_name.clone(), *pos), + None, + 0, + None, + )))); + }); + + let hash = calc_fn_hash( + empty(), + KEYWORD_FN_PTR_CURRY, + state.externals.len(), + empty(), + ); + + let fn_call = Expr::FnCall(Box::new(( + (KEYWORD_FN_PTR_CURRY.into(), false, settings.pos), + None, + hash, + args, + None, + ))); + + Expr::Dot(Box::new((fn_expr, fn_call, settings.pos))) +} + /// Parse an anonymous function definition. #[cfg(not(feature = "no_function"))] fn parse_anon_fn( @@ -3091,7 +3157,17 @@ fn parse_anon_fn( let body = parse_stmt(input, state, lib, settings.level_up()) .map(|stmt| stmt.unwrap_or_else(|| Stmt::Noop(pos)))?; - let params: StaticVec<_> = params.into_iter().map(|(p, _)| p).collect(); + #[cfg(feature = "no_capture")] + let params: StaticVec<_> = params.into_iter().map(|(v, _)| v).collect(); + + // Add parameters that are auto-curried + #[cfg(not(feature = "no_capture"))] + let params: StaticVec<_> = state + .externals + .keys() + .cloned() + .chain(params.into_iter().map(|(v, _)| v)) + .collect(); // Calculate hash #[cfg(feature = "no_std")] @@ -3117,6 +3193,9 @@ fn parse_anon_fn( let expr = Expr::FnPointer(Box::new((fn_name, settings.pos))); + #[cfg(not(feature = "no_capture"))] + let expr = make_curry_from_externals(expr, state, &settings); + Ok((expr, script)) } @@ -3128,7 +3207,6 @@ impl Engine { optimization_level: OptimizationLevel, ) -> Result { let mut functions = Default::default(); - let mut state = ParseState::new( self, #[cfg(not(feature = "unchecked"))] @@ -3174,7 +3252,6 @@ impl Engine { ) -> Result<(Vec, Vec), ParseError> { let mut statements: Vec = Default::default(); let mut functions = Default::default(); - let mut state = ParseState::new( self, #[cfg(not(feature = "unchecked"))] diff --git a/src/token.rs b/src/token.rs index 77187304..7d1a07ba 100644 --- a/src/token.rs +++ b/src/token.rs @@ -21,7 +21,6 @@ use crate::stdlib::{ iter::Peekable, str::{Chars, FromStr}, string::{String, ToString}, - vec::Vec, }; type LERR = LexError; @@ -747,8 +746,8 @@ pub fn parse_string_literal( pos: &mut Position, enclosing_char: char, ) -> Result { - let mut result = Vec::new(); - let mut escape = String::with_capacity(12); + let mut result: StaticVec = Default::default(); + let mut escape: StaticVec = Default::default(); loop { let next_char = stream.get_next().ok_or((LERR::UnterminatedString, *pos))?; @@ -787,8 +786,8 @@ pub fn parse_string_literal( // \x??, \u????, \U???????? ch @ 'x' | ch @ 'u' | ch @ 'U' if !escape.is_empty() => { let mut seq = escape.clone(); - seq.push(ch); escape.clear(); + seq.push(ch); let mut out_val: u32 = 0; let len = match ch { @@ -799,23 +798,31 @@ pub fn parse_string_literal( }; for _ in 0..len { - let c = stream - .get_next() - .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?; + let c = stream.get_next().ok_or_else(|| { + ( + LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), + *pos, + ) + })?; seq.push(c); pos.advance(); out_val *= 16; - out_val += c - .to_digit(16) - .ok_or_else(|| (LERR::MalformedEscapeSequence(seq.to_string()), *pos))?; + out_val += c.to_digit(16).ok_or_else(|| { + ( + LERR::MalformedEscapeSequence(seq.iter().cloned().collect()), + *pos, + ) + })?; } - result.push( - char::from_u32(out_val) - .ok_or_else(|| (LERR::MalformedEscapeSequence(seq), *pos))?, - ); + result.push(char::from_u32(out_val).ok_or_else(|| { + ( + LERR::MalformedEscapeSequence(seq.into_iter().collect()), + *pos, + ) + })?); } // \{enclosing_char} - escaped @@ -828,7 +835,12 @@ pub fn parse_string_literal( ch if enclosing_char == ch && escape.is_empty() => break, // Unknown escape sequence - _ if !escape.is_empty() => return Err((LERR::MalformedEscapeSequence(escape), *pos)), + _ if !escape.is_empty() => { + return Err(( + LERR::MalformedEscapeSequence(escape.into_iter().collect()), + *pos, + )) + } // Cannot have new-lines inside string literals '\n' => { @@ -983,7 +995,7 @@ fn get_next_token_inner( // digit ... ('0'..='9', _) => { - let mut result = Vec::new(); + let mut result: StaticVec = Default::default(); let mut radix_base: Option = None; result.push(c); @@ -1385,7 +1397,7 @@ fn get_identifier( start_pos: Position, first_char: char, ) -> Option<(Token, Position)> { - let mut result = Vec::new(); + let mut result: StaticVec<_> = Default::default(); result.push(first_char); while let Some(next_char) = stream.peek_next() { @@ -1400,7 +1412,7 @@ fn get_identifier( let is_valid_identifier = is_valid_identifier(result.iter().cloned()); - let identifier: String = result.into_iter().collect(); + let identifier = result.into_iter().collect(); if !is_valid_identifier { return Some(( diff --git a/src/utils.rs b/src/utils.rs index 760c809e..13e11704 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,8 +1,4 @@ //! Module containing various utility types and functions. -//! -//! # Safety -//! -//! The `StaticVec` type has some `unsafe` blocks to handle conversions between `MaybeUninit` and regular types. use crate::fn_native::{shared_make_mut, shared_take, Shared}; @@ -13,12 +9,9 @@ use crate::stdlib::{ fmt, hash::{BuildHasher, Hash, Hasher}, iter::FromIterator, - mem, - mem::MaybeUninit, - ops::{Add, AddAssign, Deref, DerefMut, Drop, Index, IndexMut}, + ops::{Add, AddAssign, Deref}, str::FromStr, string::{String, ToString}, - vec::Vec, }; #[cfg(not(feature = "no_std"))] @@ -27,6 +20,8 @@ use crate::stdlib::collections::hash_map::DefaultHasher; #[cfg(feature = "no_std")] use ahash::AHasher; +use smallvec::SmallVec; + /// A hasher that only takes one single `u64` and returns it as a hash key. /// /// # Panics @@ -92,549 +87,10 @@ pub fn calc_fn_spec<'a>( s.finish() } -/// [INTERNALS] An array-like type that holds a number of values in static storage for no-allocation, quick access. +/// [INTERNALS] Alias to [`smallvec::SmallVec<[T; 4]>`](https://crates.io/crates/smallvec), +/// which is a specialized `Vec` backed by a small, fixed-size array when there are <= 4 items stored. /// Exported under the `internals` feature only. -/// -/// If too many items are stored, it converts into using a `Vec`. -/// -/// -/// This is essentially a knock-off of the [`staticvec`](https://crates.io/crates/staticvec) crate. -/// This simplified implementation here is to avoid pulling in another crate. -/// -/// # Implementation -/// -/// A `StaticVec` holds data in _either one_ of two storages: 1) a fixed-size array of `MAX_STATIC_VEC` -/// items, and 2) a dynamic `Vec`. At any time, either one of them (or both) must be empty, depending on the -/// total number of items. -/// -/// There is a `len` field containing the total number of items held by the `StaticVec`. -/// -/// The fixed-size array (`list`) is not initialized (i.e. initialized with `MaybeUninit::uninit()`). -/// -/// When `len <= MAX_STATIC_VEC`, all elements are stored in the fixed-size array. -/// Array slots `>= len` are `MaybeUninit::uninit()` while slots `< len` are considered actual data. -/// In this scenario, the `Vec` (`more`) is empty. -/// -/// As soon as we try to push a new item into the `StaticVec` that makes the total number exceed -/// `MAX_STATIC_VEC`, all the items in the fixed-sized array are taken out, replaced with -/// `MaybeUninit::uninit()` (via `mem::replace`) and pushed into the `Vec`. -/// Then the new item is added to the `Vec`. -/// -/// Therefore, if `len > MAX_STATIC_VEC`, then the fixed-size array (`list`) is considered -/// empty and uninitialized while all data resides in the `Vec` (`more`). -/// -/// When popping an item off of the `StaticVec`, the reverse is true. When `len = MAX_STATIC_VEC + 1`, -/// after popping the item, all the items residing in the `Vec` are moved back to the fixed-size array (`list`). -/// The `Vec` will then be empty. -/// -/// Therefore, if `len <= MAX_STATIC_VEC`, data is in the fixed-size array (`list`). -/// Otherwise, data is in the `Vec` (`more`). -/// -/// # Safety -/// -/// This type uses some unsafe code (mainly for uninitialized/unused array slots) for efficiency. -/// -/// ## WARNING -/// -/// This type is volatile and may change. -// -// TODO - remove unsafe code -pub struct StaticVec { - /// Total number of values held. - len: usize, - /// Fixed-size storage for fast, no-allocation access. - list: [MaybeUninit; MAX_STATIC_VEC], - /// Dynamic storage. For spill-overs. - more: Vec, -} - -/// Maximum slots of fixed-size storage for a `StaticVec`. -/// 4 slots should be enough for most cases. -const MAX_STATIC_VEC: usize = 4; - -impl Drop for StaticVec { - fn drop(&mut self) { - self.clear(); - } -} - -impl Hash for StaticVec { - fn hash(&self, state: &mut H) { - self.iter().for_each(|x| x.hash(state)); - } -} - -impl Default for StaticVec { - fn default() -> Self { - Self { - len: 0, - list: unsafe { mem::MaybeUninit::uninit().assume_init() }, - more: Vec::new(), - } - } -} - -impl PartialEq for StaticVec { - fn eq(&self, other: &Self) -> bool { - if self.len != other.len || self.more != other.more { - return false; - } - - if self.len > MAX_STATIC_VEC { - return true; - } - - unsafe { - mem::transmute::<_, &[T; MAX_STATIC_VEC]>(&self.list) - == mem::transmute::<_, &[T; MAX_STATIC_VEC]>(&other.list) - } - } -} - -impl Clone for StaticVec { - fn clone(&self) -> Self { - let mut value: Self = Default::default(); - value.len = self.len; - - if self.is_fixed_storage() { - for x in 0..self.len { - let item = self.list.get(x).unwrap(); - let item_value = unsafe { mem::transmute::<_, &T>(item) }; - value.list[x] = MaybeUninit::new(item_value.clone()); - } - } else { - value.more = self.more.clone(); - } - - value - } -} - -impl Eq for StaticVec {} - -impl FromIterator for StaticVec { - fn from_iter>(iter: X) -> Self { - let mut vec = StaticVec::new(); - - for x in iter { - vec.push(x); - } - - vec - } -} - -impl IntoIterator for StaticVec { - type Item = T; - type IntoIter = Box>; - - fn into_iter(self) -> Self::IntoIter { - self.into_iter() - } -} - -impl StaticVec { - /// Create a new `StaticVec`. - pub fn new() -> Self { - Default::default() - } - /// Empty the `StaticVec`. - pub fn clear(&mut self) { - if self.is_fixed_storage() { - for x in 0..self.len { - self.extract_from_list(x); - } - } else { - self.more.clear(); - } - self.len = 0; - } - /// Extract a `MaybeUninit` into a concrete initialized type. - fn extract(value: MaybeUninit) -> T { - unsafe { value.assume_init() } - } - /// Extract an item from the fixed-size array, replacing it with `MaybeUninit::uninit()`. - /// - /// # Panics - /// - /// Panics if fixed-size storage is not used, or if the `index` is out of bounds. - fn extract_from_list(&mut self, index: usize) -> T { - if !self.is_fixed_storage() { - panic!("not fixed storage in StaticVec"); - } - if index >= self.len { - panic!("index OOB in StaticVec"); - } - Self::extract(mem::replace( - self.list.get_mut(index).unwrap(), - MaybeUninit::uninit(), - )) - } - /// Set an item into the fixed-size array. - /// If `drop` is `true`, the original value is extracted then automatically dropped. - /// - /// # Panics - /// - /// Panics if fixed-size storage is not used, or if the `index` is out of bounds. - fn set_into_list(&mut self, index: usize, value: T, drop: bool) { - if !self.is_fixed_storage() { - panic!("not fixed storage in StaticVec"); - } - // Allow setting at most one slot to the right - if index > self.len { - panic!("index OOB in StaticVec"); - } - let temp = mem::replace(self.list.get_mut(index).unwrap(), MaybeUninit::new(value)); - if drop { - // Extract the original value - which will drop it automatically - Self::extract(temp); - } - } - /// Move item in the fixed-size array into the `Vec`. - /// - /// # Panics - /// - /// Panics if fixed-size storage is not used, or if the fixed-size storage is not full. - fn move_fixed_into_vec(&mut self, num: usize) { - if !self.is_fixed_storage() { - panic!("not fixed storage in StaticVec"); - } - if self.len != num { - panic!("fixed storage is not full in StaticVec"); - } - self.more.extend( - self.list - .iter_mut() - .take(num) - .map(|v| mem::replace(v, MaybeUninit::uninit())) - .map(Self::extract), - ); - } - /// Is data stored in fixed-size storage? - fn is_fixed_storage(&self) -> bool { - self.len <= MAX_STATIC_VEC - } - /// Push a new value to the end of this `StaticVec`. - pub fn push>(&mut self, value: X) { - if self.len == MAX_STATIC_VEC { - self.move_fixed_into_vec(MAX_STATIC_VEC); - self.more.push(value.into()); - } else if self.is_fixed_storage() { - self.set_into_list(self.len, value.into(), false); - } else { - self.more.push(value.into()); - } - self.len += 1; - } - /// Insert a new value to this `StaticVec` at a particular position. - /// - /// # Panics - /// - /// Panics if `index` is out of bounds. - pub fn insert>(&mut self, index: usize, value: X) { - if index > self.len { - panic!("index OOB in StaticVec"); - } - - if self.len == MAX_STATIC_VEC { - self.move_fixed_into_vec(MAX_STATIC_VEC); - self.more.insert(index, value.into()); - } else if self.is_fixed_storage() { - // Move all items one slot to the right - for x in (index..self.len).rev() { - let orig_value = self.extract_from_list(x); - self.set_into_list(x + 1, orig_value, false); - } - self.set_into_list(index, value.into(), false); - } else { - self.more.insert(index, value.into()); - } - self.len += 1; - } - /// Pop a value from the end of this `StaticVec`. - /// - /// # Panics - /// - /// Panics if the `StaticVec` is empty. - pub fn pop(&mut self) -> T { - if self.is_empty() { - panic!("nothing to pop!"); - } - - if self.is_fixed_storage() { - let value = self.extract_from_list(self.len - 1); - self.len -= 1; - value - } else { - let value = self.more.pop().unwrap(); - self.len -= 1; - - // Move back to the fixed list - if self.more.len() == MAX_STATIC_VEC { - for index in (0..MAX_STATIC_VEC).rev() { - let item = self.more.pop().unwrap(); - self.set_into_list(index, item, false); - } - } - - value - } - } - /// Remove a value from this `StaticVec` at a particular position. - /// - /// # Panics - /// - /// Panics if `index` is out of bounds. - pub fn remove(&mut self, index: usize) -> T { - if index >= self.len { - panic!("index OOB in StaticVec"); - } - - if self.is_fixed_storage() { - let value = self.extract_from_list(index); - - // Move all items one slot to the left - for x in index + 1..self.len { - let orig_value = self.extract_from_list(x); - self.set_into_list(x - 1, orig_value, false); - } - self.len -= 1; - - value - } else { - let value = self.more.remove(index); - self.len -= 1; - - // Move back to the fixed list - if self.more.len() == MAX_STATIC_VEC { - for index in (0..MAX_STATIC_VEC).rev() { - let item = self.more.pop().unwrap(); - self.set_into_list(index, item, false); - } - } - - value - } - } - /// Get the number of items in this `StaticVec`. - pub fn len(&self) -> usize { - self.len - } - /// Is this `StaticVec` empty? - pub fn is_empty(&self) -> bool { - self.len == 0 - } - /// Get a reference to the item at a particular index. - /// - /// # Panics - /// - /// Panics if `index` is out of bounds. - pub fn get(&self, index: usize) -> &T { - if index >= self.len { - panic!("index OOB in StaticVec"); - } - - let list = unsafe { mem::transmute::<_, &[T; MAX_STATIC_VEC]>(&self.list) }; - - if self.is_fixed_storage() { - list.get(index).unwrap() - } else { - self.more.get(index).unwrap() - } - } - /// Get a mutable reference to the item at a particular index. - /// - /// # Panics - /// - /// Panics if `index` is out of bounds. - pub fn get_mut(&mut self, index: usize) -> &mut T { - if index >= self.len { - panic!("index OOB in StaticVec"); - } - - let list = unsafe { mem::transmute::<_, &mut [T; MAX_STATIC_VEC]>(&mut self.list) }; - - if self.is_fixed_storage() { - list.get_mut(index).unwrap() - } else { - self.more.get_mut(index).unwrap() - } - } - /// Get an iterator to entries in the `StaticVec`. - pub fn iter(&self) -> impl Iterator { - let list = unsafe { mem::transmute::<_, &[T; MAX_STATIC_VEC]>(&self.list) }; - - if self.is_fixed_storage() { - list[..self.len].iter() - } else { - self.more.iter() - } - } - /// Get a mutable iterator to entries in the `StaticVec`. - pub fn iter_mut(&mut self) -> impl Iterator { - let list = unsafe { mem::transmute::<_, &mut [T; MAX_STATIC_VEC]>(&mut self.list) }; - - if self.is_fixed_storage() { - list[..self.len].iter_mut() - } else { - self.more.iter_mut() - } - } -} - -impl StaticVec { - /// Get a mutable iterator to entries in the `StaticVec`. - pub fn into_iter(mut self) -> Box> { - if self.is_fixed_storage() { - let mut it = FixedStorageIterator { - data: unsafe { mem::MaybeUninit::uninit().assume_init() }, - index: 0, - limit: self.len, - }; - - for x in 0..self.len { - it.data[x] = mem::replace(self.list.get_mut(x).unwrap(), MaybeUninit::uninit()); - } - self.len = 0; - - Box::new(it) - } else { - Box::new(Vec::from(self).into_iter()) - } - } -} - -/// An iterator that takes control of the fixed-size storage of a `StaticVec` and returns its values. -struct FixedStorageIterator { - data: [MaybeUninit; MAX_STATIC_VEC], - index: usize, - limit: usize, -} - -impl Iterator for FixedStorageIterator { - type Item = T; - - fn next(&mut self) -> Option { - if self.index >= self.limit { - None - } else { - self.index += 1; - - let value = mem::replace( - self.data.get_mut(self.index - 1).unwrap(), - MaybeUninit::uninit(), - ); - - unsafe { Some(value.assume_init()) } - } - } -} - -impl StaticVec { - /// Get the item at a particular index, replacing it with the default. - /// - /// # Panics - /// - /// Panics if `index` is out of bounds. - pub fn take(&mut self, index: usize) -> T { - if index >= self.len { - panic!("index OOB in StaticVec"); - } - - mem::take(if self.is_fixed_storage() { - unsafe { mem::transmute(self.list.get_mut(index).unwrap()) } - } else { - self.more.get_mut(index).unwrap() - }) - } -} - -impl fmt::Debug for StaticVec { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Debug::fmt(&self.iter().collect::>(), f) - } -} - -impl AsRef<[T]> for StaticVec { - fn as_ref(&self) -> &[T] { - let list = unsafe { mem::transmute::<_, &[T; MAX_STATIC_VEC]>(&self.list) }; - - if self.is_fixed_storage() { - &list[..self.len] - } else { - &self.more[..] - } - } -} - -impl AsMut<[T]> for StaticVec { - fn as_mut(&mut self) -> &mut [T] { - let list = unsafe { mem::transmute::<_, &mut [T; MAX_STATIC_VEC]>(&mut self.list) }; - - if self.is_fixed_storage() { - &mut list[..self.len] - } else { - &mut self.more[..] - } - } -} - -impl Deref for StaticVec { - type Target = [T]; - fn deref(&self) -> &Self::Target { - self.as_ref() - } -} - -impl DerefMut for StaticVec { - fn deref_mut(&mut self) -> &mut Self::Target { - self.as_mut() - } -} - -impl Index for StaticVec { - type Output = T; - - fn index(&self, index: usize) -> &Self::Output { - self.get(index) - } -} - -impl IndexMut for StaticVec { - fn index_mut(&mut self, index: usize) -> &mut Self::Output { - self.get_mut(index) - } -} - -impl From> for Vec { - fn from(mut value: StaticVec) -> Self { - if value.len <= MAX_STATIC_VEC { - value.move_fixed_into_vec(value.len); - } - value.len = 0; - - let mut arr = Self::new(); - arr.append(&mut value.more); - arr - } -} - -impl From> for StaticVec { - fn from(mut value: Vec) -> Self { - let mut arr: Self = Default::default(); - arr.len = value.len(); - - if arr.len <= MAX_STATIC_VEC { - for x in (0..arr.len).rev() { - arr.set_into_list(x, value.pop().unwrap(), false); - } - } else { - arr.more = value; - } - - arr - } -} +pub type StaticVec = SmallVec<[T; 4]>; /// The system immutable string type. /// diff --git a/tests/call_fn.rs b/tests/call_fn.rs index e75d93ec..0f9087d2 100644 --- a/tests/call_fn.rs +++ b/tests/call_fn.rs @@ -83,38 +83,6 @@ fn test_call_fn_private() -> Result<(), Box> { Ok(()) } -#[test] -fn test_anonymous_fn() -> Result<(), Box> { - let calc_func = Func::<(INT, INT, INT), INT>::create_from_script( - Engine::new(), - "fn calc(x, y, z,) { (x + y) * z }", - "calc", - )?; - - assert_eq!(calc_func(42, 123, 9)?, 1485); - - let calc_func = Func::<(INT, String, INT), INT>::create_from_script( - Engine::new(), - "fn calc(x, y, z) { (x + len(y)) * z }", - "calc", - )?; - - assert_eq!(calc_func(42, "hello".to_string(), 9)?, 423); - - let calc_func = Func::<(INT, INT, INT), INT>::create_from_script( - Engine::new(), - "private fn calc(x, y, z) { (x + y) * z }", - "calc", - )?; - - assert!(matches!( - *calc_func(42, 123, 9).expect_err("should error"), - EvalAltResult::ErrorFunctionNotFound(fn_name, _) if fn_name == "calc" - )); - - Ok(()) -} - #[test] #[cfg(not(feature = "no_object"))] fn test_fn_ptr_raw() -> Result<(), Box> { @@ -179,33 +147,33 @@ fn test_fn_ptr_raw() -> Result<(), Box> { } #[test] -fn test_fn_ptr_curry_call() -> Result<(), Box> { - let mut module = Module::new(); +fn test_anonymous_fn() -> Result<(), Box> { + let calc_func = Func::<(INT, INT, INT), INT>::create_from_script( + Engine::new(), + "fn calc(x, y, z,) { (x + y) * z }", + "calc", + )?; - module.set_raw_fn( - "call_with_arg", - &[TypeId::of::(), TypeId::of::()], - |engine: &Engine, lib: &Module, args: &mut [&mut Dynamic]| { - let fn_ptr = std::mem::take(args[0]).cast::(); - fn_ptr.call_dynamic(engine, lib, None, [std::mem::take(args[1])]) - }, - ); + assert_eq!(calc_func(42, 123, 9)?, 1485); - let mut engine = Engine::new(); - engine.load_package(module.into()); + let calc_func = Func::<(INT, String, INT), INT>::create_from_script( + Engine::new(), + "fn calc(x, y, z) { (x + len(y)) * z }", + "calc", + )?; - #[cfg(not(feature = "no_object"))] - assert_eq!( - engine.eval::( - r#" - let addition = |x, y| { x + y }; - let curried = addition.curry(2); + assert_eq!(calc_func(42, "hello".to_string(), 9)?, 423); - call_with_arg(curried, 40) - "# - )?, - 42 - ); + let calc_func = Func::<(INT, INT, INT), INT>::create_from_script( + Engine::new(), + "private fn calc(x, y, z) { (x + y) * z }", + "calc", + )?; + + assert!(matches!( + *calc_func(42, 123, 9).expect_err("should error"), + EvalAltResult::ErrorFunctionNotFound(fn_name, _) if fn_name == "calc" + )); Ok(()) } diff --git a/tests/closures.rs b/tests/closures.rs new file mode 100644 index 00000000..3aec4865 --- /dev/null +++ b/tests/closures.rs @@ -0,0 +1,60 @@ +#![cfg(not(feature = "no_function"))] +use rhai::{Dynamic, Engine, EvalAltResult, FnPtr, Module, INT}; +use std::any::TypeId; + +#[test] +fn test_fn_ptr_curry_call() -> Result<(), Box> { + let mut module = Module::new(); + + module.set_raw_fn( + "call_with_arg", + &[TypeId::of::(), TypeId::of::()], + |engine: &Engine, lib: &Module, args: &mut [&mut Dynamic]| { + let fn_ptr = std::mem::take(args[0]).cast::(); + fn_ptr.call_dynamic(engine, lib, None, [std::mem::take(args[1])]) + }, + ); + + let mut engine = Engine::new(); + engine.load_package(module.into()); + + #[cfg(not(feature = "no_object"))] + assert_eq!( + engine.eval::( + r#" + let addition = |x, y| { x + y }; + let curried = addition.curry(2); + + call_with_arg(curried, 40) + "# + )?, + 42 + ); + + Ok(()) +} + +#[test] +#[cfg(not(feature = "no_capture"))] +fn test_closures() -> Result<(), Box> { + let engine = Engine::new(); + + assert_eq!( + engine.eval::( + r#" + let x = 8; + + let res = |y, z| { + let w = 12; + + return (|| x + y + z + w).call(); + }.curry(15).call(2); + + res + (|| x - 3).call() + "# + )?, + 42 + ); + + Ok(()) +}