From 527577895242ff4e453543bc40c54ff24633a68e Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Fri, 12 Jun 2020 18:46:36 +0800 Subject: [PATCH 1/6] Add test to call_fn with String parameter. --- README.md | 2 +- tests/call_fn.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 99ce9018..2b70b36d 100644 --- a/README.md +++ b/README.md @@ -316,7 +316,7 @@ Functions declared with `private` are hidden and cannot be called from Rust (see // Define functions in a script. let ast = engine.compile(true, r#" - // a function with two parameters: String and i64 + // a function with two parameters: string and i64 fn hello(x, y) { x.len + y } diff --git a/tests/call_fn.rs b/tests/call_fn.rs index 0bed8d7b..29f7abca 100644 --- a/tests/call_fn.rs +++ b/tests/call_fn.rs @@ -89,6 +89,14 @@ fn test_anonymous_fn() -> Result<(), Box> { assert_eq!(calc_func(42, 123, 9)?, 1485); + let calc_func = Func::<(INT, String, INT), INT>::create_from_script( + Engine::new(), + "fn calc(x, y, z) { (x + len(y)) * z }", + "calc", + )?; + + assert_eq!(calc_func(42, "hello".to_string(), 9)?, 423); + let calc_func = Func::<(INT, INT, INT), INT>::create_from_script( Engine::new(), "private fn calc(x, y, z) { (x + y) * z }", From b24fdd7a4d9b356c5d545065340ecf681a5311f2 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Fri, 12 Jun 2020 19:54:55 +0800 Subject: [PATCH 2/6] Change call_fn_dynamic to accept any type that is IntoIterator. --- README.md | 26 ++++++++++++-------------- RELEASES.md | 1 + src/api.rs | 34 +++++++++++++++++++++++----------- src/utils.rs | 9 +++++++++ 4 files changed, 45 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 2b70b36d..7ad9c478 100644 --- a/README.md +++ b/README.md @@ -359,16 +359,14 @@ let result: i64 = engine.call_fn(&mut scope, &ast, "hello", () )?; let result: () = engine.call_fn(&mut scope, &ast, "hidden", ())?; ``` -For more control, construct all arguments as `Dynamic` values and use `Engine::call_fn_dynamic`: +For more control, construct all arguments as `Dynamic` values and use `Engine::call_fn_dynamic`, passing it +anything that implements `IntoIterator` (such as a simple `Vec`): ```rust let result: Dynamic = engine.call_fn_dynamic(&mut scope, &ast, "hello", - &mut [ String::from("abc").into(), 123_i64.into() ])?; + vec![ String::from("abc").into(), 123_i64.into() ])?; ``` -However, beware that `Engine::call_fn_dynamic` _consumes_ its arguments, meaning that all arguments passed to it -will be replaced by `()` afterwards. To re-use the arguments, clone them beforehand and pass in the clone. - ### Creating Rust anonymous functions from Rhai script [`Func`]: #creating-rust-anonymous-functions-from-rhai-script @@ -738,7 +736,7 @@ use rhai::Dynamic; let x = (42_i64).into(); // 'into()' works for standard types -let y = Dynamic::from(String::from("hello!")); // remember &str is not supported by Rhai +let y = Dynamic::from("hello!".to_string()); // remember &str is not supported by Rhai ``` Functions registered with the [`Engine`] can be _overloaded_ as long as the _signature_ is unique, @@ -1124,14 +1122,14 @@ not available under [`no_index`]. To use custom types for `print` and `debug`, or convert its value into a [string], it is necessary that the following functions be registered (assuming the custom type is `T : Display + Debug`): -| Function | Signature | Typical implementation | Usage | -| ----------- | ------------------------------------------------ | ------------------------------ | --------------------------------------------------------------------------------------- | -| `to_string` | `|s: &mut T| -> String` | `s.to_string()` | Converts the custom type into a [string] | -| `print` | `|s: &mut T| -> String` | `s.to_string()` | Converts the custom type into a [string] for the [`print`](#print-and-debug) statement | -| `debug` | `|s: &mut T| -> String` | `format!("{:?}", s)` | Converts the custom type into a [string] for the [`debug`](#print-and-debug) statement | -| `+` | `|s1: ImmutableString, s: T| -> ImmutableString` | `s1 + s` | Append the custom type to another [string], for `print("Answer: " + type);` usage | -| `+` | `|s: T, s2: ImmutableString| -> String` | `s.to_string().push_str(&s2);` | Append another [string] to the custom type, for `print(type + " is the answer");` usage | -| `+=` | `|s1: &mut ImmutableString, s: T|` | `s1 += s.to_string()` | Append the custom type to an existing [string], for `s += type;` usage | +| Function | Signature | Typical implementation | Usage | +| ----------- | ------------------------------------------------ | ------------------------------------- | --------------------------------------------------------------------------------------- | +| `to_string` | `|s: &mut T| -> ImmutableString` | `s.to_string().into()` | Converts the custom type into a [string] | +| `print` | `|s: &mut T| -> ImmutableString` | `s.to_string().into()` | Converts the custom type into a [string] for the [`print`](#print-and-debug) statement | +| `debug` | `|s: &mut T| -> ImmutableString` | `format!("{:?}", s).into()` | Converts the custom type into a [string] for the [`debug`](#print-and-debug) statement | +| `+` | `|s1: ImmutableString, s: T| -> ImmutableString` | `s1 + s` | Append the custom type to another [string], for `print("Answer: " + type);` usage | +| `+` | `|s: T, s2: ImmutableString| -> ImmutableString` | `s.to_string().push_str(&s2).into();` | Append another [string] to the custom type, for `print(type + " is the answer");` usage | +| `+=` | `|s1: &mut ImmutableString, s: T|` | `s1 += s.to_string()` | Append the custom type to an existing [string], for `s += type;` usage | `Scope` - Initializing and maintaining state ------------------------------------------- diff --git a/RELEASES.md b/RELEASES.md index 0b3136c1..6667774c 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -14,6 +14,7 @@ Breaking changes * `Engine::register_indexer` is renamed to `Engine::register_indexer_get`. * `Module::set_indexer_fn` is renamed to `Module::set_indexer_get_fn`. * The tuple `ParseError` now exposes the internal fields and the `ParseError::error_type` and `ParseError::position` methods are removed. The first tuple field is the `ParseErrorType` and the second tuple field is the `Position`. +* `Engine::call_fn_dynamic` now takes any type that implements `IntoIterator`. New features ------------ diff --git a/src/api.rs b/src/api.rs index df92f891..8e2af013 100644 --- a/src/api.rs +++ b/src/api.rs @@ -1114,7 +1114,7 @@ impl Engine { args: A, ) -> Result> { let mut arg_values = args.into_vec(); - let result = self.call_fn_dynamic(scope, ast, name, arg_values.as_mut())?; + let result = self.call_fn_dynamic_raw(scope, ast, name, arg_values.as_mut())?; let return_type = self.map_type_name(result.type_name()); @@ -1128,13 +1128,6 @@ impl Engine { /// Call a script function defined in an `AST` with multiple `Dynamic` arguments. /// - /// ## WARNING - /// - /// All the arguments are _consumed_, meaning that they're replaced by `()`. - /// This is to avoid unnecessarily cloning the arguments. - /// Do you use the arguments after this call. If you need them afterwards, - /// clone them _before_ calling this function. - /// /// # Example /// /// ``` @@ -1155,13 +1148,13 @@ impl Engine { /// scope.push("foo", 42_i64); /// /// // Call the script-defined function - /// let result = engine.call_fn_dynamic(&mut scope, &ast, "add", &mut [ String::from("abc").into(), 123_i64.into() ])?; + /// let result = engine.call_fn_dynamic(&mut scope, &ast, "add", vec![ String::from("abc").into(), 123_i64.into() ])?; /// assert_eq!(result.cast::(), 168); /// - /// let result = engine.call_fn_dynamic(&mut scope, &ast, "add1", &mut [ String::from("abc").into() ])?; + /// let result = engine.call_fn_dynamic(&mut scope, &ast, "add1", vec![ String::from("abc").into() ])?; /// assert_eq!(result.cast::(), 46); /// - /// let result= engine.call_fn_dynamic(&mut scope, &ast, "bar", &mut [])?; + /// let result= engine.call_fn_dynamic(&mut scope, &ast, "bar", vec![])?; /// assert_eq!(result.cast::(), 21); /// # } /// # Ok(()) @@ -1169,6 +1162,25 @@ impl Engine { /// ``` #[cfg(not(feature = "no_function"))] pub fn call_fn_dynamic( + &self, + scope: &mut Scope, + ast: &AST, + name: &str, + arg_values: impl IntoIterator, + ) -> Result> { + let mut arg_values: StaticVec<_> = arg_values.into_iter().collect(); + self.call_fn_dynamic_raw(scope, ast, name, arg_values.as_mut()) + } + + /// Call a script function defined in an `AST` with multiple `Dynamic` arguments. + /// + /// ## WARNING + /// + /// All the arguments are _consumed_, meaning that they're replaced by `()`. + /// This is to avoid unnecessarily cloning the arguments. + /// Do not use the arguments after this call. If they are needed afterwards, + /// clone them _before_ calling this function. + pub(crate) fn call_fn_dynamic_raw( &self, scope: &mut Scope, ast: &AST, diff --git a/src/utils.rs b/src/utils.rs index bedf73b2..77cef0e7 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -216,6 +216,15 @@ impl FromIterator for StaticVec { } } +impl IntoIterator for StaticVec { + type Item = T; + type IntoIter = Box>; + + fn into_iter(self) -> Self::IntoIter { + self.into_iter() + } +} + impl StaticVec { /// Create a new `StaticVec`. #[inline(always)] From f00457559094a1513a112aa5f97024c1a0670cd4 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sat, 13 Jun 2020 17:03:49 +0800 Subject: [PATCH 3/6] Fix namespace error in no_std. --- src/error.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/error.rs b/src/error.rs index d75af495..dd1be4ef 100644 --- a/src/error.rs +++ b/src/error.rs @@ -155,7 +155,7 @@ impl ParseErrorType { } impl fmt::Display for ParseErrorType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::BadInput(s) | ParseErrorType::MalformedCallExpr(s) => { write!(f, "{}", if s.is_empty() { self.desc() } else { s }) From 954f971ddf059ae3871a75139b65c40b4ceaa959 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sat, 13 Jun 2020 21:57:46 +0800 Subject: [PATCH 4/6] New optimizer test. --- tests/optimizer.rs | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/optimizer.rs b/tests/optimizer.rs index 49b15c25..828b0da4 100644 --- a/tests/optimizer.rs +++ b/tests/optimizer.rs @@ -3,7 +3,7 @@ use rhai::{Engine, EvalAltResult, OptimizationLevel, INT}; #[test] -fn test_optimizer() -> Result<(), Box> { +fn test_optimizer_run() -> Result<(), Box> { fn run_test(engine: &mut Engine) -> Result<(), Box> { assert_eq!(engine.eval::(r"if true { 42 } else { 123 }")?, 42); assert_eq!( @@ -30,3 +30,27 @@ fn test_optimizer() -> Result<(), Box> { Ok(()) } + +#[test] +fn test_optimizer_parse() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_optimization_level(OptimizationLevel::Simple); + + let ast = engine.compile("{ const DECISION = false; if DECISION { 42 } }")?; + + assert_eq!( + format!("{:?}", ast), + "AST([], )" + ); + + engine.set_optimization_level(OptimizationLevel::Full); + + let ast = engine.compile("if 1 == 2 { 42 }")?; + + assert_eq!( + format!("{:?}", ast), + "AST([], )" + ); + + Ok(()) +} From b690ebac69ec3ddb1562e7f8629ec6158d151a05 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sat, 13 Jun 2020 21:57:57 +0800 Subject: [PATCH 5/6] Refine section on progress tracking. --- README.md | 48 ++++++++++++++++++++++++++++-------------------- _config.yml | 1 - 2 files changed, 28 insertions(+), 21 deletions(-) delete mode 100644 _config.yml diff --git a/README.md b/README.md index 7ad9c478..57c56afa 100644 --- a/README.md +++ b/README.md @@ -26,11 +26,11 @@ Features * Re-entrant scripting [`Engine`] can be made `Send + Sync` (via the [`sync`] feature). * Sand-boxed - the scripting [`Engine`], if declared immutable, cannot mutate the containing environment unless explicitly permitted (e.g. via a `RefCell`). * Rugged (protection against [stack-overflow](#maximum-call-stack-depth) and [runaway scripts](#maximum-number-of-operations) etc.). -* Track script evaluation [progress](#tracking-progress) and manually terminate a script run. +* Track script evaluation [progress](#tracking-progress-and-force-terminate-script-run) and manually terminate a script run. * [`no-std`](#optional-features) support. * [Function overloading](#function-overloading). * [Operator overloading](#operator-overloading). -* Organize code base with dynamically-loadable [Modules]. +* Organize code base with dynamically-loadable [modules]. * Scripts are [optimized](#script-optimization) (useful for template-based machine-generated scripts) for repeated evaluations. * Support for [minimal builds](#minimal-builds) by excluding unneeded language [features](#optional-features). * Very few additional dependencies (right now only [`num-traits`](https://crates.io/crates/num-traits/) @@ -2434,8 +2434,8 @@ engine.set_module_resolver(None); Ruggedization - protect against DoS attacks ------------------------------------------ -For scripting systems open to user-land scripts, it is always best to limit the amount of resources used by a script -so that it does not consume more resources that it is allowed to. +For scripting systems open to untrusted user-land scripts, it is always best to limit the amount of resources used by +a script so that it does not consume more resources that it is allowed to. The most important resources to watch out for are: @@ -2476,38 +2476,46 @@ A good rule-of-thumb is that one simple non-trivial expression consumes on avera One _operation_ can take an unspecified amount of time and real CPU cycles, depending on the particulars. For example, loading a constant consumes very few CPU cycles, while calling an external Rust function, though also counted as only one operation, may consume much more computing resources. -If it helps to visualize, think of an _operation_ as roughly equals to one _instruction_ of a hypothetical CPU. +To help visualize, think of an _operation_ as roughly equals to one _instruction_ of a hypothetical CPU +which includes _specialized_ instructions, such as _function call_, _load module_ etc., each taking up +one CPU cycle to execute. -The _operation count_ is intended to be a very course-grained measurement of the amount of CPU that a script -is consuming, and allows the system to impose a hard upper limit. +The _operations count_ is intended to be a very course-grained measurement of the amount of CPU that a script +has consumed, allowing the system to impose a hard upper limit on computing resources. -A script exceeding the maximum operations count will terminate with an error result. -This check can be disabled via the [`unchecked`] feature for higher performance -(but higher risks as well). +A script exceeding the maximum operations count terminates with an error result. +This can be disabled via the [`unchecked`] feature for higher performance (but higher risks as well). -### Tracking progress +### Tracking progress and force-terminate script run -To track script evaluation progress and to force-terminate a script prematurely (for any reason), -provide a closure to the `Engine::on_progress` method: +It is impossible to know when, or even whether, a script run will end +(a.k.a. the [Halting Problem](http://en.wikipedia.org/wiki/Halting_problem)). +When dealing with third-party untrusted scripts that may be malicious, to track evaluation progress and +to force-terminate a script prematurely (for any reason), provide a closure to the `Engine::on_progress` method: ```rust let mut engine = Engine::new(); -engine.on_progress(|&count| { // 'count' is the number of operations performed +engine.on_progress(|&count| { // parameter is '&u64' - number of operations already performed if count % 1000 == 0 { println!("{}", count); // print out a progress log every 1,000 operations } - true // return 'true' to continue the script - // returning 'false' will terminate the script + true // return 'true' to continue running the script + // return 'false' to immediately terminate the script }); ``` -The closure passed to `Engine::on_progress` will be called once every operation. +The closure passed to `Engine::on_progress` will be called once for every operation. Return `false` to terminate the script immediately. +Notice that the _operations count_ value passed into the closure does not indicate the _percentage_ of work +already done by the script (and thus it is not real _progress_ tracking), because it is impossible to determine +how long a script may run. It is possible, however, to calculate this percentage based on an estimated +total number of operations for a typical run. + ### Maximum number of modules -Rhai by default does not limit how many [modules] are loaded via the [`import`] statement. +Rhai by default does not limit how many [modules] can be loaded via [`import`] statements. This can be changed via the `Engine::set_max_modules` method, with zero being unlimited (the default). ```rust @@ -2528,7 +2536,7 @@ Rhai by default limits function calls to a maximum depth of 128 levels (16 level This limit may be changed via the `Engine::set_max_call_levels` method. When setting this limit, care must be also taken to the evaluation depth of each _statement_ -within the function. It is entirely possible for a malicous script to embed an recursive call deep +within the function. It is entirely possible for a malicous script to embed a recursive call deep inside a nested expression or statement block (see [maximum statement depth](#maximum-statement-depth)). The limit can be disabled via the [`unchecked`] feature for higher performance @@ -2627,7 +2635,7 @@ For example, in the following: ```rust { - let x = 999; // NOT eliminated: Rhai doesn't check yet whether a variable is used later on + let x = 999; // NOT eliminated: variable may be used later on (perhaps even an 'eval') 123; // eliminated: no effect "hello"; // eliminated: no effect [1, 2, x, x*2, 5]; // eliminated: no effect diff --git a/_config.yml b/_config.yml deleted file mode 100644 index c7418817..00000000 --- a/_config.yml +++ /dev/null @@ -1 +0,0 @@ -theme: jekyll-theme-slate \ No newline at end of file From 22d30c95c9c80844ffc471f9b4479cea5154c5e3 Mon Sep 17 00:00:00 2001 From: Stephen Chung Date: Sun, 14 Jun 2020 00:09:16 +0800 Subject: [PATCH 6/6] Add maximum data size limits. --- README.md | 86 ++++++++++++++-- RELEASES.md | 1 + src/api.rs | 10 +- src/engine.rs | 97 +++++++++++++++++- src/error.rs | 17 +++- src/parser.rs | 244 ++++++++++++++++++++++++++------------------- src/result.rs | 8 ++ src/token.rs | 58 +++++++---- tests/data_size.rs | 135 +++++++++++++++++++++++++ 9 files changed, 518 insertions(+), 138 deletions(-) create mode 100644 tests/data_size.rs diff --git a/README.md b/README.md index 57c56afa..3ab88d26 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ to add scripting to any application. Features -------- -* Easy-to-use language similar to JS+Rust with dynamic typing but _no_ garbage collector. +* Easy-to-use language similar to JS+Rust with dynamic typing. * Tight integration with native Rust [functions](#working-with-functions) and [types](#custom-types-and-methods), including [getters/setters](#getters-and-setters), [methods](#members-and-methods) and [indexers](#indexers). * Freely pass Rust variables/constants into a script via an external [`Scope`]. @@ -25,7 +25,7 @@ Features one single source file, all with names starting with `"unsafe_"`). * Re-entrant scripting [`Engine`] can be made `Send + Sync` (via the [`sync`] feature). * Sand-boxed - the scripting [`Engine`], if declared immutable, cannot mutate the containing environment unless explicitly permitted (e.g. via a `RefCell`). -* Rugged (protection against [stack-overflow](#maximum-call-stack-depth) and [runaway scripts](#maximum-number-of-operations) etc.). +* Rugged - protection against malicious attacks (such as [stack-overflow](#maximum-call-stack-depth), [over-sized data](#maximum-length-of-strings), and [runaway scripts](#maximum-number-of-operations) etc.) that may come from untrusted third-party user-land scripts. * Track script evaluation [progress](#tracking-progress-and-force-terminate-script-run) and manually terminate a script run. * [`no-std`](#optional-features) support. * [Function overloading](#function-overloading). @@ -1191,13 +1191,16 @@ fn main() -> Result<(), Box> Engine configuration options --------------------------- -| Method | Description | -| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------- | -| `set_optimization_level` | Set the amount of script _optimizations_ performed. See [script optimization]. | -| `set_max_expr_depths` | Set the maximum nesting levels of an expression/statement. See [maximum statement depth](#maximum-statement-depth). | -| `set_max_call_levels` | Set the maximum number of function call levels (default 50) to avoid infinite recursion. See [maximum call stack depth](#maximum-call-stack-depth). | -| `set_max_operations` | Set the maximum number of _operations_ that a script is allowed to consume. See [maximum number of operations](#maximum-number-of-operations). | -| `set_max_modules` | Set the maximum number of [modules] that a script is allowed to load. See [maximum number of modules](#maximum-number-of-modules). | +| Method | Not available under | Description | +| ------------------------ | ---------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------- | +| `set_optimization_level` | [`no_optimize`] | Set the amount of script _optimizations_ performed. See [script optimization]. | +| `set_max_expr_depths` | [`unchecked`] | Set the maximum nesting levels of an expression/statement. See [maximum statement depth](#maximum-statement-depth). | +| `set_max_call_levels` | [`unchecked`] | Set the maximum number of function call levels (default 50) to avoid infinite recursion. See [maximum call stack depth](#maximum-call-stack-depth). | +| `set_max_operations` | [`unchecked`] | Set the maximum number of _operations_ that a script is allowed to consume. See [maximum number of operations](#maximum-number-of-operations). | +| `set_max_modules` | [`unchecked`] | Set the maximum number of [modules] that a script is allowed to load. See [maximum number of modules](#maximum-number-of-modules). | +| `set_max_string_size` | [`unchecked`] | Set the maximum length (in UTF-8 bytes) for [strings]. See [maximum length of strings](#maximum-length-of-strings). | +| `set_max_array_size` | [`unchecked`], [`no_index`] | Set the maximum size for [arrays]. See [maximum size of arrays](#maximum-size-of-arrays). | +| `set_max_map_size` | [`unchecked`], [`no_object`] | Set the maximum number of properties for [object maps]. See [maximum size of object maps](#maximum-size-of-object-maps). | ------- @@ -1498,6 +1501,9 @@ record == "Bob X. Davis: age 42 ❤\n"; 'C' in record == false; ``` +The maximum allowed length of a string can be controlled via `Engine::set_max_string_size` +(see [maximum length of strings](#maximum-length-of-strings)). + ### Built-in functions The following standard methods (mostly defined in the [`MoreStringPackage`](#packages) but excluded if using a [raw `Engine`]) operate on strings: @@ -1673,6 +1679,9 @@ y.len == 0; engine.register_fn("push", |list: &mut Array, item: MyType| list.push(Box::new(item)) ); ``` +The maximum allowed size of an array can be controlled via `Engine::set_max_array_size` +(see [maximum size of arrays](#maximum-size-of-arrays)). + Object maps ----------- @@ -1776,6 +1785,9 @@ y.clear(); // empty the object map y.len() == 0; ``` +The maximum allowed size of an object map can be controlled via `Engine::set_max_map_size` +(see [maximum size of object maps](#maximum-size-of-object-maps)). + ### Parsing from JSON The syntax for an object map is extremely similar to JSON, with the exception of `null` values which can @@ -2439,7 +2451,7 @@ a script so that it does not consume more resources that it is allowed to. The most important resources to watch out for are: -* **Memory**: A malicous script may continuously grow an [array] or [object map] until all memory is consumed. +* **Memory**: A malicous script may continuously grow a [string], an [array] or [object map] until all memory is consumed. It may also create a large [array] or [object map] literal that exhausts all memory during parsing. * **CPU**: A malicous script may run an infinite tight loop that consumes all CPU cycles. * **Time**: A malicous script may run indefinitely, thereby blocking the calling system which is waiting for a result. @@ -2455,6 +2467,60 @@ The most important resources to watch out for are: * **Data**: A malicous script may attempt to read from and/or write to data that it does not own. If this happens, it is a severe security breach and may put the entire system at risk. +### Maximum length of strings + +Rhai by default does not limit how long a [string] can be. +This can be changed via the `Engine::set_max_string_size` method, with zero being unlimited (the default). + +```rust +let mut engine = Engine::new(); + +engine.set_max_string_size(500); // allow strings only up to 500 bytes long (in UTF-8 format) + +engine.set_max_string_size(0); // allow unlimited string length +``` + +A script attempting to create a string literal longer than the maximum will terminate with a parse error. +Any script operation that produces a string longer than the maximum also terminates the script with an error result. +This check can be disabled via the [`unchecked`] feature for higher performance +(but higher risks as well). + +### Maximum size of arrays + +Rhai by default does not limit how large an [array] can be. +This can be changed via the `Engine::set_max_array_size` method, with zero being unlimited (the default). + +```rust +let mut engine = Engine::new(); + +engine.set_max_array_size(500); // allow arrays only up to 500 items + +engine.set_max_array_size(0); // allow unlimited arrays +``` + +A script attempting to create an array literal larger than the maximum will terminate with a parse error. +Any script operation that produces an array larger than the maximum also terminates the script with an error result. +This check can be disabled via the [`unchecked`] feature for higher performance +(but higher risks as well). + +### Maximum size of object maps + +Rhai by default does not limit how large (i.e. the number of properties) an [object map] can be. +This can be changed via the `Engine::set_max_map_size` method, with zero being unlimited (the default). + +```rust +let mut engine = Engine::new(); + +engine.set_max_map_size(500); // allow object maps with only up to 500 properties + +engine.set_max_map_size(0); // allow unlimited object maps +``` + +A script attempting to create an object map literal with more properties than the maximum will terminate with a parse error. +Any script operation that produces an object map with more properties than the maximum also terminates the script with an error result. +This check can be disabled via the [`unchecked`] feature for higher performance +(but higher risks as well). + ### Maximum number of operations Rhai by default does not limit how much time or CPU a script consumes. diff --git a/RELEASES.md b/RELEASES.md index 6667774c..b19caf44 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -21,6 +21,7 @@ New features * Indexers are now split into getters and setters (which now support updates). The API is split into `Engine::register_indexer_get` and `Engine::register_indexer_set` with `Engine::register_indexer_get_set` being a shorthand. Similarly, `Module::set_indexer_get_fn` and `Module::set_indexer_set_fn` are added. * `Engine:register_fn` and `Engine:register_result_fn` accepts functions that take parameters of type `&str` (immutable string slice), which maps directly to `ImmutableString`. This is to avoid needing wrappers for functions taking string parameters. +* Set maximum limit on data sizes: `Engine::set_max_string_size`, `Engine::set_max_array_size` and `Engine::set_max_map_size`. Version 0.15.0 diff --git a/src/api.rs b/src/api.rs index 8e2af013..56adc08c 100644 --- a/src/api.rs +++ b/src/api.rs @@ -547,7 +547,7 @@ impl Engine { scripts: &[&str], optimization_level: OptimizationLevel, ) -> Result { - let stream = lex(scripts); + let stream = lex(scripts, self.max_string_size); self.parse(&mut stream.peekable(), scope, optimization_level) } @@ -669,7 +669,7 @@ impl Engine { // Trims the JSON string and add a '#' in front let scripts = ["#", json.trim()]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); let ast = self.parse_global_expr(&mut stream.peekable(), &scope, OptimizationLevel::None)?; @@ -750,7 +750,7 @@ impl Engine { script: &str, ) -> Result { let scripts = [script]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); { let mut peekable = stream.peekable(); @@ -904,7 +904,7 @@ impl Engine { script: &str, ) -> Result> { let scripts = [script]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); let ast = self.parse_global_expr( &mut stream.peekable(), @@ -1034,7 +1034,7 @@ impl Engine { script: &str, ) -> Result<(), Box> { let scripts = [script]; - let stream = lex(&scripts); + let stream = lex(&scripts, self.max_string_size); let ast = self.parse(&mut stream.peekable(), scope, self.optimization_level)?; self.consume_ast_with_scope(scope, &ast) diff --git a/src/engine.rs b/src/engine.rs index b3d0536d..2ef39ee1 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -67,6 +67,13 @@ pub const MAX_EXPR_DEPTH: usize = usize::MAX; #[cfg(feature = "unchecked")] pub const MAX_FUNCTION_EXPR_DEPTH: usize = usize::MAX; +#[cfg(feature = "unchecked")] +pub const MAX_STRING_SIZE: usize = usize::MAX; +#[cfg(feature = "unchecked")] +pub const MAX_ARRAY_SIZE: usize = usize::MAX; +#[cfg(feature = "unchecked")] +pub const MAX_MAP_SIZE: usize = usize::MAX; + pub const KEYWORD_PRINT: &str = "print"; pub const KEYWORD_DEBUG: &str = "debug"; pub const KEYWORD_TYPE_OF: &str = "type_of"; @@ -262,6 +269,12 @@ pub struct Engine { pub(crate) max_operations: u64, /// Maximum number of modules allowed to load. pub(crate) max_modules: u64, + /// Maximum length of a string. + pub(crate) max_string_size: usize, + /// Maximum length of an array. + pub(crate) max_array_size: usize, + /// Maximum number of properties in a map. + pub(crate) max_map_size: usize, } impl Default for Engine { @@ -298,6 +311,9 @@ impl Default for Engine { max_function_expr_depth: MAX_FUNCTION_EXPR_DEPTH, max_operations: u64::MAX, max_modules: u64::MAX, + max_string_size: usize::MAX, + max_array_size: usize::MAX, + max_map_size: usize::MAX, }; engine.load_package(StandardPackage::new().get()); @@ -442,6 +458,9 @@ impl Engine { max_function_expr_depth: MAX_FUNCTION_EXPR_DEPTH, max_operations: u64::MAX, max_modules: u64::MAX, + max_string_size: usize::MAX, + max_array_size: usize::MAX, + max_map_size: usize::MAX, } } @@ -495,13 +514,33 @@ impl Engine { self.max_modules = if modules == 0 { u64::MAX } else { modules }; } - /// Set the depth limits for expressions/statements. + /// Set the depth limits for expressions/statements (0 for unlimited). #[cfg(not(feature = "unchecked"))] pub fn set_max_expr_depths(&mut self, max_expr_depth: usize, max_function_expr_depth: usize) { self.max_expr_depth = max_expr_depth; self.max_function_expr_depth = max_function_expr_depth; } + /// Set the maximum length of strings (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + pub fn set_max_string_size(&mut self, max_size: usize) { + self.max_string_size = max_size; + } + + /// Set the maximum length of arrays (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_index"))] + pub fn set_max_array_size(&mut self, max_size: usize) { + self.max_array_size = max_size; + } + + /// Set the maximum length of object maps (0 for unlimited). + #[cfg(not(feature = "unchecked"))] + #[cfg(not(feature = "no_object"))] + pub fn set_max_map_size(&mut self, max_size: usize) { + self.max_map_size = max_size; + } + /// Set the module resolution service used by the `Engine`. /// /// Not available under the `no_module` feature. @@ -1395,7 +1434,7 @@ impl Engine { self.inc_operations(state) .map_err(|err| EvalAltResult::new_position(err, expr.position()))?; - match expr { + let result = match expr { Expr::Expr(x) => self.eval_expr(scope, state, lib, x.as_ref(), level), Expr::IntegerConstant(x) => Ok(x.0.into()), @@ -1731,7 +1770,13 @@ impl Engine { Expr::Unit(_) => Ok(().into()), _ => unreachable!(), + }; + + if let Ok(val) = &result { + self.check_data_size(val)?; } + + result } /// Evaluate a statement @@ -1746,7 +1791,7 @@ impl Engine { self.inc_operations(state) .map_err(|err| EvalAltResult::new_position(err, stmt.position()))?; - match stmt { + let result = match stmt { // No-op Stmt::Noop(_) => Ok(Default::default()), @@ -1998,6 +2043,52 @@ impl Engine { } Ok(Default::default()) } + }; + + if let Ok(val) = &result { + self.check_data_size(val)?; + } + + result + } + + /// Check a `Dynamic` value to ensure that its size is within allowable limit. + fn check_data_size(&self, value: &Dynamic) -> Result<(), Box> { + #[cfg(feature = "unchecked")] + return Ok(()); + + match value { + Dynamic(Union::Str(s)) + if self.max_string_size > 0 && s.len() > self.max_string_size => + { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Length of string".to_string(), + self.max_string_size, + s.len(), + Position::none(), + ))) + } + #[cfg(not(feature = "no_index"))] + Dynamic(Union::Array(arr)) + if self.max_array_size > 0 && arr.len() > self.max_array_size => + { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Length of array".to_string(), + self.max_array_size, + arr.len(), + Position::none(), + ))) + } + #[cfg(not(feature = "no_object"))] + Dynamic(Union::Map(map)) if self.max_map_size > 0 && map.len() > self.max_map_size => { + Err(Box::new(EvalAltResult::ErrorDataTooLarge( + "Number of properties in object map".to_string(), + self.max_map_size, + map.len(), + Position::none(), + ))) + } + _ => Ok(()), } } diff --git a/src/error.rs b/src/error.rs index dd1be4ef..3d4c47c7 100644 --- a/src/error.rs +++ b/src/error.rs @@ -12,6 +12,8 @@ pub enum LexError { UnexpectedChar(char), /// A string literal is not terminated before a new-line or EOF. UnterminatedString, + /// An identifier is in an invalid format. + StringTooLong(usize), /// An string/character/numeric escape sequence is in an invalid format. MalformedEscapeSequence(String), /// An numeric literal is in an invalid format. @@ -35,6 +37,11 @@ impl fmt::Display for LexError { Self::MalformedChar(s) => write!(f, "Invalid character: '{}'", s), Self::MalformedIdentifier(s) => write!(f, "Variable name is not proper: '{}'", s), Self::UnterminatedString => write!(f, "Open string is not terminated"), + Self::StringTooLong(max) => write!( + f, + "Length of string literal exceeds the maximum limit ({})", + max + ), Self::ImproperKeyword(s) => write!(f, "{}", s), } } @@ -109,12 +116,16 @@ pub enum ParseErrorType { WrongExport, /// Assignment to a copy of a value. AssignmentToCopy, - /// Assignment to an a constant variable. + /// Assignment to an a constant variable. Wrapped value is the constant variable name. AssignmentToConstant(String), /// Expression exceeding the maximum levels of complexity. /// /// Never appears under the `unchecked` feature. ExprTooDeep, + /// Literal exceeding the maximum size. Wrapped values are the data type name and the maximum size. + /// + /// Never appears under the `unchecked` feature. + LiteralTooLarge(String, usize), /// Break statement not inside a loop. LoopBreak, } @@ -149,6 +160,7 @@ impl ParseErrorType { Self::AssignmentToCopy => "Only a copy of the value is change with this assignment", Self::AssignmentToConstant(_) => "Cannot assign to a constant value", Self::ExprTooDeep => "Expression exceeds maximum complexity", + Self::LiteralTooLarge(_, _) => "Literal exceeds maximum limit", Self::LoopBreak => "Break statement should only be used inside a loop" } } @@ -197,6 +209,9 @@ impl fmt::Display for ParseErrorType { Self::AssignmentToConstant(s) if s.is_empty() => write!(f, "{}", self.desc()), Self::AssignmentToConstant(s) => write!(f, "Cannot assign to constant '{}'", s), + Self::LiteralTooLarge(typ, max) => { + write!(f, "{} exceeds the maximum limit ({})", typ, max) + } _ => write!(f, "{}", self.desc()), } } diff --git a/src/parser.rs b/src/parser.rs index 2eed69b1..d17657e0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -201,13 +201,27 @@ struct ParseState { stack: Vec<(String, ScopeEntryType)>, /// Maximum levels of expression nesting. max_expr_depth: usize, + /// Maximum length of a string. + pub max_string_size: usize, + /// Maximum length of an array. + pub max_array_size: usize, + /// Maximum number of properties in a map. + pub max_map_size: usize, } impl ParseState { /// Create a new `ParseState`. - pub fn new(max_expr_depth: usize) -> Self { + pub fn new( + max_expr_depth: usize, + max_string_size: usize, + max_array_size: usize, + max_map_size: usize, + ) -> Self { Self { max_expr_depth, + max_string_size, + max_array_size, + max_map_size, ..Default::default() } } @@ -1070,6 +1084,14 @@ fn parse_array_literal( if !match_token(input, Token::RightBracket)? { while !input.peek().unwrap().0.is_eof() { + if state.max_array_size > 0 && arr.len() >= state.max_array_size { + return Err(PERR::LiteralTooLarge( + "Size of array literal".to_string(), + state.max_array_size, + ) + .into_err(input.peek().unwrap().1)); + } + let expr = parse_expr(input, state, settings.level_up())?; arr.push(expr); @@ -1155,8 +1177,15 @@ fn parse_map_literal( } }; - let expr = parse_expr(input, state, settings.level_up())?; + if state.max_map_size > 0 && map.len() >= state.max_map_size { + return Err(PERR::LiteralTooLarge( + "Number of properties in object map literal".to_string(), + state.max_map_size, + ) + .into_err(input.peek().unwrap().1)); + } + let expr = parse_expr(input, state, settings.level_up())?; map.push(((name, pos), expr)); match input.peek().unwrap() { @@ -2408,102 +2437,6 @@ fn parse_fn( }) } -/// Parse the global level statements. -fn parse_global_level( - input: &mut TokenStream, - max_expr_depth: usize, - max_function_expr_depth: usize, -) -> Result<(Vec, Vec), ParseError> { - let mut statements = Vec::::new(); - let mut functions = HashMap::::with_hasher(StraightHasherBuilder); - let mut state = ParseState::new(max_expr_depth); - - while !input.peek().unwrap().0.is_eof() { - // Collect all the function definitions - #[cfg(not(feature = "no_function"))] - { - let (access, must_be_fn) = if match_token(input, Token::Private)? { - (FnAccess::Private, true) - } else { - (FnAccess::Public, false) - }; - - match input.peek().unwrap() { - #[cfg(not(feature = "no_function"))] - (Token::Fn, pos) => { - let mut state = ParseState::new(max_function_expr_depth); - let settings = ParseSettings { - allow_if_expr: true, - allow_stmt_expr: true, - is_global: false, - is_breakable: false, - level: 0, - pos: *pos, - }; - let func = parse_fn(input, &mut state, access, settings)?; - - // Qualifiers (none) + function name + number of arguments. - let hash = calc_fn_hash(empty(), &func.name, func.params.len(), empty()); - - functions.insert(hash, func); - continue; - } - (_, pos) if must_be_fn => { - return Err(PERR::MissingToken( - Token::Fn.into(), - format!("following '{}'", Token::Private.syntax()), - ) - .into_err(*pos)) - } - _ => (), - } - } - - // Actual statement - let settings = ParseSettings { - allow_if_expr: true, - allow_stmt_expr: true, - is_global: true, - is_breakable: false, - level: 0, - pos: Position::none(), - }; - let stmt = parse_stmt(input, &mut state, settings)?; - - let need_semicolon = !stmt.is_self_terminated(); - - statements.push(stmt); - - match input.peek().unwrap() { - // EOF - (Token::EOF, _) => break, - // stmt ; - (Token::SemiColon, _) if need_semicolon => { - eat_token(input, Token::SemiColon); - } - // stmt ; - (Token::SemiColon, _) if !need_semicolon => (), - // { stmt } ??? - (_, _) if !need_semicolon => (), - // stmt - (Token::LexError(err), pos) => { - return Err(PERR::BadInput(err.to_string()).into_err(*pos)) - } - // stmt ??? - (_, pos) => { - // Semicolons are not optional between statements - return Err(PERR::MissingToken( - Token::SemiColon.into(), - "to terminate this statement".into(), - ) - .into_err(*pos)); - } - } - } - - Ok((statements, functions.into_iter().map(|(_, v)| v).collect())) -} - impl Engine { pub(crate) fn parse_global_expr( &self, @@ -2511,7 +2444,12 @@ impl Engine { scope: &Scope, optimization_level: OptimizationLevel, ) -> Result { - let mut state = ParseState::new(self.max_expr_depth); + let mut state = ParseState::new( + self.max_expr_depth, + self.max_string_size, + self.max_array_size, + self.max_map_size, + ); let settings = ParseSettings { allow_if_expr: false, allow_stmt_expr: false, @@ -2540,6 +2478,111 @@ impl Engine { ) } + /// Parse the global level statements. + fn parse_global_level( + &self, + input: &mut TokenStream, + ) -> Result<(Vec, Vec), ParseError> { + let mut statements = Vec::::new(); + let mut functions = HashMap::::with_hasher(StraightHasherBuilder); + let mut state = ParseState::new( + self.max_expr_depth, + self.max_string_size, + self.max_array_size, + self.max_map_size, + ); + + while !input.peek().unwrap().0.is_eof() { + // Collect all the function definitions + #[cfg(not(feature = "no_function"))] + { + let (access, must_be_fn) = if match_token(input, Token::Private)? { + (FnAccess::Private, true) + } else { + (FnAccess::Public, false) + }; + + match input.peek().unwrap() { + #[cfg(not(feature = "no_function"))] + (Token::Fn, pos) => { + let mut state = ParseState::new( + self.max_function_expr_depth, + self.max_string_size, + self.max_array_size, + self.max_map_size, + ); + let settings = ParseSettings { + allow_if_expr: true, + allow_stmt_expr: true, + is_global: false, + is_breakable: false, + level: 0, + pos: *pos, + }; + let func = parse_fn(input, &mut state, access, settings)?; + + // Qualifiers (none) + function name + number of arguments. + let hash = calc_fn_hash(empty(), &func.name, func.params.len(), empty()); + + functions.insert(hash, func); + continue; + } + (_, pos) if must_be_fn => { + return Err(PERR::MissingToken( + Token::Fn.into(), + format!("following '{}'", Token::Private.syntax()), + ) + .into_err(*pos)) + } + _ => (), + } + } + + // Actual statement + let settings = ParseSettings { + allow_if_expr: true, + allow_stmt_expr: true, + is_global: true, + is_breakable: false, + level: 0, + pos: Position::none(), + }; + let stmt = parse_stmt(input, &mut state, settings)?; + + let need_semicolon = !stmt.is_self_terminated(); + + statements.push(stmt); + + match input.peek().unwrap() { + // EOF + (Token::EOF, _) => break, + // stmt ; + (Token::SemiColon, _) if need_semicolon => { + eat_token(input, Token::SemiColon); + } + // stmt ; + (Token::SemiColon, _) if !need_semicolon => (), + // { stmt } ??? + (_, _) if !need_semicolon => (), + // stmt + (Token::LexError(err), pos) => { + return Err(PERR::BadInput(err.to_string()).into_err(*pos)) + } + // stmt ??? + (_, pos) => { + // Semicolons are not optional between statements + return Err(PERR::MissingToken( + Token::SemiColon.into(), + "to terminate this statement".into(), + ) + .into_err(*pos)); + } + } + } + + Ok((statements, functions.into_iter().map(|(_, v)| v).collect())) + } + /// Run the parser on an input stream, returning an AST. pub(crate) fn parse( &self, @@ -2547,8 +2590,7 @@ impl Engine { scope: &Scope, optimization_level: OptimizationLevel, ) -> Result { - let (statements, lib) = - parse_global_level(input, self.max_expr_depth, self.max_function_expr_depth)?; + let (statements, lib) = self.parse_global_level(input)?; Ok( // Optimize AST diff --git a/src/result.rs b/src/result.rs index 36bc18f6..83943df2 100644 --- a/src/result.rs +++ b/src/result.rs @@ -81,6 +81,8 @@ pub enum EvalAltResult { ErrorTooManyModules(Position), /// Call stack over maximum limit. ErrorStackOverflow(Position), + /// Data value over maximum size limit. Wrapped values are the data type, maximum size and current size. + ErrorDataTooLarge(String, usize, usize, Position), /// The script is prematurely terminated. ErrorTerminated(Position), /// Run-time error encountered. Wrapped value is the error message. @@ -139,6 +141,7 @@ impl EvalAltResult { Self::ErrorTooManyOperations(_) => "Too many operations", Self::ErrorTooManyModules(_) => "Too many modules imported", Self::ErrorStackOverflow(_) => "Stack overflow", + Self::ErrorDataTooLarge(_, _, _, _) => "Data size exceeds maximum limit", Self::ErrorTerminated(_) => "Script terminated.", Self::ErrorRuntime(_, _) => "Runtime error", Self::ErrorLoopBreak(true, _) => "Break statement not inside a loop", @@ -228,6 +231,9 @@ impl fmt::Display for EvalAltResult { "String index {} is out of bounds: only {} characters in the string", index, max )?, + Self::ErrorDataTooLarge(typ, max, size, _) => { + write!(f, "{} ({}) exceeds the maximum limit ({})", typ, size, max)? + } } // Do not write any position if None @@ -279,6 +285,7 @@ impl EvalAltResult { | Self::ErrorTooManyOperations(pos) | Self::ErrorTooManyModules(pos) | Self::ErrorStackOverflow(pos) + | Self::ErrorDataTooLarge(_, _, _, pos) | Self::ErrorTerminated(pos) | Self::ErrorRuntime(_, pos) | Self::ErrorLoopBreak(_, pos) @@ -316,6 +323,7 @@ impl EvalAltResult { | Self::ErrorTooManyOperations(pos) | Self::ErrorTooManyModules(pos) | Self::ErrorStackOverflow(pos) + | Self::ErrorDataTooLarge(_, _, _, pos) | Self::ErrorTerminated(pos) | Self::ErrorRuntime(_, pos) | Self::ErrorLoopBreak(_, pos) diff --git a/src/token.rs b/src/token.rs index d6799a6f..614783b4 100644 --- a/src/token.rs +++ b/src/token.rs @@ -429,6 +429,8 @@ impl From for String { /// An iterator on a `Token` stream. pub struct TokenIterator<'a> { + /// Maximum length of a string (0 = unlimited). + max_string_size: usize, /// Can the next token be a unary operator? can_be_unary: bool, /// Current position. @@ -494,6 +496,7 @@ impl<'a> TokenIterator<'a> { pub fn parse_string_literal( &mut self, enclosing_char: char, + max_length: usize, ) -> Result { let mut result = Vec::new(); let mut escape = String::with_capacity(12); @@ -505,6 +508,10 @@ impl<'a> TokenIterator<'a> { self.advance(); + if max_length > 0 && result.len() > max_length { + return Err((LexError::StringTooLong(max_length), self.pos)); + } + match next_char { // \... '\\' if escape.is_empty() => { @@ -592,7 +599,13 @@ impl<'a> TokenIterator<'a> { } } - Ok(result.iter().collect()) + let s = result.iter().collect::(); + + if max_length > 0 && s.len() > max_length { + return Err((LexError::StringTooLong(max_length), self.pos)); + } + + Ok(s) } /// Get the next token. @@ -779,10 +792,12 @@ impl<'a> TokenIterator<'a> { // " - string literal ('"', _) => { - return self.parse_string_literal('"').map_or_else( - |err| Some((Token::LexError(Box::new(err.0)), err.1)), - |out| Some((Token::StringConst(out), pos)), - ); + return self + .parse_string_literal('"', self.max_string_size) + .map_or_else( + |err| Some((Token::LexError(Box::new(err.0)), err.1)), + |out| Some((Token::StringConst(out), pos)), + ); } // ' - character literal @@ -793,19 +808,25 @@ impl<'a> TokenIterator<'a> { )); } ('\'', _) => { - return Some(self.parse_string_literal('\'').map_or_else( - |err| (Token::LexError(Box::new(err.0)), err.1), - |result| { - let mut chars = result.chars(); - let first = chars.next(); + return Some( + self.parse_string_literal('\'', self.max_string_size) + .map_or_else( + |err| (Token::LexError(Box::new(err.0)), err.1), + |result| { + let mut chars = result.chars(); + let first = chars.next(); - if chars.next().is_some() { - (Token::LexError(Box::new(LERR::MalformedChar(result))), pos) - } else { - (Token::CharConstant(first.expect("should be Some")), pos) - } - }, - )); + if chars.next().is_some() { + ( + Token::LexError(Box::new(LERR::MalformedChar(result))), + pos, + ) + } else { + (Token::CharConstant(first.expect("should be Some")), pos) + } + }, + ), + ); } // Braces @@ -1047,8 +1068,9 @@ impl<'a> Iterator for TokenIterator<'a> { } /// Tokenize an input text stream. -pub fn lex<'a>(input: &'a [&'a str]) -> TokenIterator<'a> { +pub fn lex<'a>(input: &'a [&'a str], max_string_size: usize) -> TokenIterator<'a> { TokenIterator { + max_string_size, can_be_unary: true, pos: Position::new(1, 0), streams: input.iter().map(|s| s.chars().peekable()).collect(), diff --git a/tests/data_size.rs b/tests/data_size.rs new file mode 100644 index 00000000..637fde5e --- /dev/null +++ b/tests/data_size.rs @@ -0,0 +1,135 @@ +#![cfg(not(feature = "unchecked"))] +use rhai::{Engine, EvalAltResult, ParseError, ParseErrorType}; + +#[cfg(not(feature = "no_index"))] +use rhai::Array; + +#[cfg(not(feature = "no_object"))] +use rhai::Map; + +#[test] +fn test_max_string_size() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_string_size(10); + + assert!(matches!( + engine.compile(r#"let x = "hello, world!";"#).expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::BadInput("Length of string literal exceeds the maximum limit (10)".to_string()) + )); + + assert!(matches!( + *engine + .eval::( + r#" + let x = "hello, "; + let y = "world!"; + x + y + "# + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 13, _) + )); + + engine.set_max_string_size(0); + + assert_eq!( + engine.eval::( + r#" + let x = "hello, "; + let y = "world!"; + x + y + "# + )?, + "hello, world!" + ); + + Ok(()) +} + +#[test] +#[cfg(not(feature = "no_index"))] +fn test_max_array_size() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_array_size(10); + + assert!(matches!( + engine + .compile("let x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15];") + .expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::LiteralTooLarge("Size of array literal".to_string(), 10) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = [1,2,3,4,5,6]; + let y = [7,8,9,10,11,12]; + x + y + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + engine.set_max_array_size(0); + + assert_eq!( + engine + .eval::( + r" + let x = [1,2,3,4,5,6]; + let y = [7,8,9,10,11,12]; + x + y + " + )? + .len(), + 12 + ); + + Ok(()) +} + +#[test] +#[cfg(not(feature = "no_object"))] +fn test_max_map_size() -> Result<(), Box> { + let mut engine = Engine::new(); + engine.set_max_map_size(10); + + assert!(matches!( + engine + .compile("let x = #{a:1,b:2,c:3,d:4,e:5,f:6,g:7,h:8,i:9,j:10,k:11,l:12,m:13,n:14,o:15};") + .expect_err("should error"), + ParseError(x, _) if *x == ParseErrorType::LiteralTooLarge("Number of properties in object map literal".to_string(), 10) + )); + + assert!(matches!( + *engine + .eval::( + r" + let x = #{a:1,b:2,c:3,d:4,e:5,f:6}; + let y = #{g:7,h:8,i:9,j:10,k:11,l:12}; + x + y + " + ) + .expect_err("should error"), + EvalAltResult::ErrorDataTooLarge(_, 10, 12, _) + )); + + engine.set_max_map_size(0); + + assert_eq!( + engine + .eval::( + r" + let x = #{a:1,b:2,c:3,d:4,e:5,f:6}; + let y = #{g:7,h:8,i:9,j:10,k:11,l:12}; + x + y + " + )? + .len(), + 12 + ); + + Ok(()) +}