diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 49318e1c..1370e03e 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -29,6 +29,7 @@ jobs:
- "--features no_object"
- "--features no_function"
- "--features no_module"
+ - "--features unicode-xid-ident"
toolchain: [stable]
experimental: [false]
include:
diff --git a/Cargo.toml b/Cargo.toml
index 2f2ab525..f049777e 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@ include = [
"Cargo.toml"
]
keywords = [ "scripting" ]
-categories = [ "no-std", "embedded", "parser-implementations" ]
+categories = [ "no-std", "embedded", "wasm", "parser-implementations" ]
[dependencies]
num-traits = { version = "0.2.11", default-features = false }
@@ -34,6 +34,7 @@ no_object = [] # no custom objects
no_function = [] # no script-defined functions
no_module = [] # no modules
internals = [] # expose internal data structures
+unicode-xid-ident = ["unicode-xid"] # allow Unicode Standard Annex #31 for identifiers.
# compiling for no-std
no_std = [ "num-traits/libm", "hashbrown", "core-error", "libm", "ahash" ]
@@ -73,6 +74,11 @@ default_features = false
features = ["derive", "alloc"]
optional = true
+[dependencies.unicode-xid]
+version = "0.2.1"
+default_features = false
+optional = true
+
[target.'cfg(target_arch = "wasm32")'.dependencies]
instant= { version = "0.1.4", features = ["wasm-bindgen"] } # WASM implementation of std::time::Instant
diff --git a/RELEASES.md b/RELEASES.md
index bd4623c0..7f59cddf 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -20,6 +20,7 @@ New features
* Custom syntax now works even without the `internals` feature.
* Currying of function pointers is supported via the new `curry` keyword.
* `Module::set_indexer_get_set_fn` is added as a shorthand of both `Module::set_indexer_get_fn` and `Module::set_indexer_set_fn`.
+* New `unicode-xid-ident` feature to allow [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) for identifiers.
Breaking changes
----------------
diff --git a/doc/src/language/variables.md b/doc/src/language/variables.md
index be4abbce..455c075b 100644
--- a/doc/src/language/variables.md
+++ b/doc/src/language/variables.md
@@ -21,6 +21,11 @@ Variable names are case _sensitive_.
Variable names also cannot be the same as a [keyword].
+### Unicode Standard Annex #31 Identifiers
+
+The [`unicode-xid-ident`] feature expands the allowed characters for variable names to the set defined by
+[Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/).
+
Declare a Variable
------------------
diff --git a/doc/src/links.md b/doc/src/links.md
index 889cee29..936c714d 100644
--- a/doc/src/links.md
+++ b/doc/src/links.md
@@ -12,6 +12,7 @@
[`no_std`]: {{rootUrl}}/start/features.md
[`no-std`]: {{rootUrl}}/start/features.md
[`internals`]: {{rootUrl}}/start/features.md
+[`unicode-xid-ident`]: {{rootUrl}}/start/features.md
[minimal builds]: {{rootUrl}}/start/builds/minimal.md
[WASM]: {{rootUrl}}/start/builds/wasm.md
diff --git a/doc/src/start/features.md b/doc/src/start/features.md
index 0d31d077..0f6b0f53 100644
--- a/doc/src/start/features.md
+++ b/doc/src/start/features.md
@@ -11,21 +11,22 @@ Notice that this deviates from Rust norm where features are _additive_.
Excluding unneeded functionalities can result in smaller, faster builds as well as
more control over what a script can (or cannot) do.
-| Feature | Description |
-| ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `unchecked` | Disable arithmetic checking (such as over-flows and division by zero), call stack depth limit, operations count limit and modules loading limit.
Beware that a bad script may panic the entire system! |
-| `sync` | Restrict all values types to those that are `Send + Sync`. Under this feature, all Rhai types, including [`Engine`], [`Scope`] and [`AST`], are all `Send + Sync`. |
-| `no_optimize` | Disable [script optimization]. |
-| `no_float` | Disable floating-point numbers and math. |
-| `only_i32` | Set the system integer type to `i32` and disable all other integer types. `INT` is set to `i32`. |
-| `only_i64` | Set the system integer type to `i64` and disable all other integer types. `INT` is set to `i64`. |
-| `no_index` | Disable [arrays] and indexing features. |
-| `no_object` | Disable support for [custom types] and [object maps]. |
-| `no_function` | Disable script-defined [functions]. |
-| `no_module` | Disable loading external [modules]. |
-| `no_std` | Build for `no-std`. Notice that additional dependencies will be pulled in to replace `std` features. |
-| `serde` | Enable serialization/deserialization via `serde`. Notice that the [`serde`](https://crates.io/crates/serde) crate will be pulled in together with its dependencies. |
-| `internals` | Expose internal data structures (e.g. [`AST`] nodes). Beware that Rhai internals are volatile and may change from version to version. |
+| Feature | Description |
+| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `unchecked` | Disable arithmetic checking (such as over-flows and division by zero), call stack depth limit, operations count limit and modules loading limit.
Beware that a bad script may panic the entire system! |
+| `sync` | Restrict all values types to those that are `Send + Sync`. Under this feature, all Rhai types, including [`Engine`], [`Scope`] and [`AST`], are all `Send + Sync`. |
+| `no_optimize` | Disable [script optimization]. |
+| `no_float` | Disable floating-point numbers and math. |
+| `only_i32` | Set the system integer type to `i32` and disable all other integer types. `INT` is set to `i32`. |
+| `only_i64` | Set the system integer type to `i64` and disable all other integer types. `INT` is set to `i64`. |
+| `no_index` | Disable [arrays] and indexing features. |
+| `no_object` | Disable support for [custom types] and [object maps]. |
+| `no_function` | Disable script-defined [functions]. |
+| `no_module` | Disable loading external [modules]. |
+| `no_std` | Build for `no-std`. Notice that additional dependencies will be pulled in to replace `std` features. |
+| `serde` | Enable serialization/deserialization via `serde`. Notice that the [`serde`](https://crates.io/crates/serde) crate will be pulled in together with its dependencies. |
+| `internals` | Expose internal data structures (e.g. [`AST`] nodes). Beware that Rhai internals are volatile and may change from version to version. |
+| `unicode-xid-ident` | Allow [Unicode Standard Annex #31](http://www.unicode.org/reports/tr31/) as identifiers. |
Example
diff --git a/src/fn_call.rs b/src/fn_call.rs
index ccdd14d5..96e81580 100644
--- a/src/fn_call.rs
+++ b/src/fn_call.rs
@@ -931,8 +931,8 @@ pub fn run_builtin_binary_op(
}
if args_type == TypeId::of::() {
- let x = *x.downcast_ref::().unwrap();
- let y = *y.downcast_ref::().unwrap();
+ let x = x.clone().cast::();
+ let y = y.clone().cast::();
#[cfg(not(feature = "unchecked"))]
match op {
@@ -973,8 +973,8 @@ pub fn run_builtin_binary_op(
_ => (),
}
} else if args_type == TypeId::of::() {
- let x = *x.downcast_ref::().unwrap();
- let y = *y.downcast_ref::().unwrap();
+ let x = x.clone().cast::();
+ let y = y.clone().cast::();
match op {
"&" => return Ok(Some((x && y).into())),
@@ -999,8 +999,8 @@ pub fn run_builtin_binary_op(
_ => (),
}
} else if args_type == TypeId::of::() {
- let x = *x.downcast_ref::().unwrap();
- let y = *y.downcast_ref::().unwrap();
+ let x = x.clone().cast::();
+ let y = y.clone().cast::();
match op {
"==" => return Ok(Some((x == y).into())),
@@ -1021,8 +1021,8 @@ pub fn run_builtin_binary_op(
#[cfg(not(feature = "no_float"))]
if args_type == TypeId::of::() {
- let x = *x.downcast_ref::().unwrap();
- let y = *y.downcast_ref::().unwrap();
+ let x = x.clone().cast::();
+ let y = y.clone().cast::();
match op {
"+" => return Ok(Some((x + y).into())),
@@ -1060,7 +1060,7 @@ pub fn run_builtin_op_assignment(
if args_type == TypeId::of::() {
let x = x.downcast_mut::().unwrap();
- let y = *y.downcast_ref::().unwrap();
+ let y = y.clone().cast::();
#[cfg(not(feature = "unchecked"))]
match op {
@@ -1096,7 +1096,7 @@ pub fn run_builtin_op_assignment(
}
} else if args_type == TypeId::of::() {
let x = x.downcast_mut::().unwrap();
- let y = *y.downcast_ref::().unwrap();
+ let y = y.clone().cast::();
match op {
"&=" => return Ok(Some(*x = *x && y)),
@@ -1116,7 +1116,7 @@ pub fn run_builtin_op_assignment(
#[cfg(not(feature = "no_float"))]
if args_type == TypeId::of::() {
let x = x.downcast_mut::().unwrap();
- let y = *y.downcast_ref::().unwrap();
+ let y = y.clone().cast::();
match op {
"+=" => return Ok(Some(*x += y)),
diff --git a/src/fn_native.rs b/src/fn_native.rs
index 6cd90321..116c62ab 100644
--- a/src/fn_native.rs
+++ b/src/fn_native.rs
@@ -300,11 +300,9 @@ impl CallableFunction {
/// Get the access mode.
pub fn access(&self) -> FnAccess {
match self {
- CallableFunction::Plugin(_) => FnAccess::Public,
- CallableFunction::Pure(_)
- | CallableFunction::Method(_)
- | CallableFunction::Iterator(_) => FnAccess::Public,
- CallableFunction::Script(f) => f.access,
+ Self::Plugin(_) => FnAccess::Public,
+ Self::Pure(_) | Self::Method(_) | Self::Iterator(_) => FnAccess::Public,
+ Self::Script(f) => f.access,
}
}
/// Get a reference to a native Rust function.
diff --git a/src/packages/string_more.rs b/src/packages/string_more.rs
index 62a5fdb2..6ddc2b0c 100644
--- a/src/packages/string_more.rs
+++ b/src/packages/string_more.rs
@@ -15,7 +15,7 @@ use crate::stdlib::{
any::TypeId,
boxed::Box,
fmt::Display,
- format,
+ format, mem,
string::{String, ToString},
vec::Vec,
};
@@ -242,7 +242,7 @@ def_package!(crate:MoreStringPackage:"Additional string utilities, including str
}
if len > 0 {
- let ch = *args[2].downcast_ref::< char>().unwrap();
+ let ch = mem::take(args[2]).cast::();
let s = args[0].downcast_mut::().unwrap();
let orig_len = s.chars().count();
diff --git a/src/token.rs b/src/token.rs
index b8576133..77187304 100644
--- a/src/token.rs
+++ b/src/token.rs
@@ -30,8 +30,11 @@ pub type TokenStream<'a, 't> = Peekable>;
/// A location (line number + character position) in the input script.
///
-/// In order to keep footprint small, both line number and character position have 16-bit unsigned resolution,
-/// meaning they go up to a maximum of 65,535 lines and characters per line.
+/// # Limitations
+///
+/// In order to keep footprint small, both line number and character position have 16-bit resolution,
+/// meaning they go up to a maximum of 65,535 lines and 65,535 characters per line.
+///
/// Advancing beyond the maximum line length or maximum number of lines is not an error but has no effect.
#[derive(Eq, PartialEq, Ord, PartialOrd, Hash, Clone, Copy)]
pub struct Position {
@@ -43,6 +46,13 @@ pub struct Position {
impl Position {
/// Create a new `Position`.
+ ///
+ /// `line` must not be zero.
+ /// If `position` is zero, then it is at the beginning of a line.
+ ///
+ /// # Panics
+ ///
+ /// Panics if `line` is zero.
pub fn new(line: u16, position: u16) -> Self {
assert!(line != 0, "line cannot be zero");
@@ -52,7 +62,7 @@ impl Position {
}
}
- /// Get the line number (1-based), or `None` if no position.
+ /// Get the line number (1-based), or `None` if there is no position.
pub fn line(&self) -> Option {
if self.is_none() {
None
@@ -85,7 +95,6 @@ impl Position {
/// # Panics
///
/// Panics if already at beginning of a line - cannot rewind to a previous line.
- ///
pub(crate) fn rewind(&mut self) {
assert!(!self.is_none(), "cannot rewind Position::none");
assert!(self.pos > 0, "cannot rewind at position 0");
@@ -104,7 +113,7 @@ impl Position {
}
/// Create a `Position` representing no position.
- pub(crate) fn none() -> Self {
+ pub fn none() -> Self {
Self { line: 0, pos: 0 }
}
@@ -146,9 +155,9 @@ impl fmt::Debug for Position {
pub enum Token {
/// An `INT` constant.
IntegerConstant(INT),
- /// A `FLOAT` constaint.
+ /// A `FLOAT` constant.
///
- /// Never appears under the `no_float` feature.
+ /// Reserved under the `no_float` feature.
#[cfg(not(feature = "no_float"))]
FloatConstant(FLOAT),
/// An identifier.
@@ -249,7 +258,7 @@ pub enum Token {
And,
/// `fn`
///
- /// Never appears under the `no_function` feature.
+ /// Reserved under the `no_function` feature.
#[cfg(not(feature = "no_function"))]
Fn,
/// `continue`
@@ -284,22 +293,22 @@ pub enum Token {
PowerOfAssign,
/// `private`
///
- /// Never appears under the `no_function` feature.
+ /// Reserved under the `no_function` feature.
#[cfg(not(feature = "no_function"))]
Private,
/// `import`
///
- /// Never appears under the `no_module` feature.
+ /// Reserved under the `no_module` feature.
#[cfg(not(feature = "no_module"))]
Import,
/// `export`
///
- /// Never appears under the `no_module` feature.
+ /// Reserved under the `no_module` feature.
#[cfg(not(feature = "no_module"))]
Export,
/// `as`
///
- /// Never appears under the `no_module` feature.
+ /// Reserved under the `no_module` feature.
#[cfg(not(feature = "no_module"))]
As,
/// A lexer error.
@@ -643,7 +652,7 @@ impl Token {
}
}
- /// Is this token a standard keyword?
+ /// Is this token an active standard keyword?
pub fn is_keyword(&self) -> bool {
use Token::*;
@@ -670,7 +679,7 @@ impl Token {
}
/// Convert a token into a function name, if possible.
- pub fn into_function_name(self) -> Result {
+ pub(crate) fn into_function_name(self) -> Result {
match self {
Self::Reserved(s) if is_keyword_function(&s) => Ok(s),
Self::Custom(s) | Self::Identifier(s) if is_valid_identifier(s.chars()) => Ok(s),
@@ -726,32 +735,6 @@ pub trait InputStream {
fn peek_next(&mut self) -> Option;
}
-pub fn is_keyword_function(name: &str) -> bool {
- name == KEYWORD_PRINT
- || name == KEYWORD_DEBUG
- || name == KEYWORD_TYPE_OF
- || name == KEYWORD_EVAL
- || name == KEYWORD_FN_PTR
- || name == KEYWORD_FN_PTR_CALL
- || name == KEYWORD_FN_PTR_CURRY
-}
-
-pub fn is_valid_identifier(name: impl Iterator- ) -> bool {
- let mut first_alphabetic = false;
-
- for ch in name {
- match ch {
- '_' => (),
- _ if char::is_ascii_alphabetic(&ch) => first_alphabetic = true,
- _ if !first_alphabetic => return false,
- _ if char::is_ascii_alphanumeric(&ch) => (),
- _ => return false,
- }
- }
-
- first_alphabetic
-}
-
/// [INTERNALS] Parse a string literal wrapped by `enclosing_char`.
/// Exported under the `internals` feature only.
///
@@ -1098,35 +1081,7 @@ fn get_next_token_inner(
// letter or underscore ...
('A'..='Z', _) | ('a'..='z', _) | ('_', _) => {
- let mut result = Vec::new();
- result.push(c);
-
- while let Some(next_char) = stream.peek_next() {
- match next_char {
- x if x.is_ascii_alphanumeric() || x == '_' => {
- result.push(x);
- eat_next(stream, pos);
- }
- _ => break,
- }
- }
-
- let is_valid_identifier = is_valid_identifier(result.iter().cloned());
-
- let identifier: String = result.into_iter().collect();
-
- if !is_valid_identifier {
- return Some((
- Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
- start_pos,
- ));
- }
-
- return Some((
- Token::lookup_from_syntax(&identifier)
- .unwrap_or_else(|| Token::Identifier(identifier)),
- start_pos,
- ));
+ return get_identifier(stream, pos, start_pos, c);
}
// " - string literal
@@ -1149,7 +1104,7 @@ fn get_next_token_inner(
|err| (Token::LexError(Box::new(err.0)), err.1),
|result| {
let mut chars = result.chars();
- let first = chars.next();
+ let first = chars.next().unwrap();
if chars.next().is_some() {
(
@@ -1157,10 +1112,7 @@ fn get_next_token_inner(
start_pos,
)
} else {
- (
- Token::CharConstant(first.expect("should be Some")),
- start_pos,
- )
+ (Token::CharConstant(first), start_pos)
}
},
))
@@ -1404,6 +1356,10 @@ fn get_next_token_inner(
('\0', _) => unreachable!(),
(ch, _) if ch.is_whitespace() => (),
+ #[cfg(feature = "unicode-xid-ident")]
+ (ch, _) if unicode_xid::UnicodeXID::is_xid_start(ch) => {
+ return get_identifier(stream, pos, start_pos, c);
+ }
(ch, _) => {
return Some((
Token::LexError(Box::new(LERR::UnexpectedInput(ch.to_string()))),
@@ -1422,6 +1378,95 @@ fn get_next_token_inner(
}
}
+/// Get the next identifier.
+fn get_identifier(
+ stream: &mut impl InputStream,
+ pos: &mut Position,
+ start_pos: Position,
+ first_char: char,
+) -> Option<(Token, Position)> {
+ let mut result = Vec::new();
+ result.push(first_char);
+
+ while let Some(next_char) = stream.peek_next() {
+ match next_char {
+ x if is_id_continue(x) => {
+ result.push(x);
+ eat_next(stream, pos);
+ }
+ _ => break,
+ }
+ }
+
+ let is_valid_identifier = is_valid_identifier(result.iter().cloned());
+
+ let identifier: String = result.into_iter().collect();
+
+ if !is_valid_identifier {
+ return Some((
+ Token::LexError(Box::new(LERR::MalformedIdentifier(identifier))),
+ start_pos,
+ ));
+ }
+
+ return Some((
+ Token::lookup_from_syntax(&identifier).unwrap_or_else(|| Token::Identifier(identifier)),
+ start_pos,
+ ));
+}
+
+/// Is this keyword allowed as a function?
+#[inline(always)]
+pub fn is_keyword_function(name: &str) -> bool {
+ name == KEYWORD_PRINT
+ || name == KEYWORD_DEBUG
+ || name == KEYWORD_TYPE_OF
+ || name == KEYWORD_EVAL
+ || name == KEYWORD_FN_PTR
+ || name == KEYWORD_FN_PTR_CALL
+ || name == KEYWORD_FN_PTR_CURRY
+}
+
+pub fn is_valid_identifier(name: impl Iterator
- ) -> bool {
+ let mut first_alphabetic = false;
+
+ for ch in name {
+ match ch {
+ '_' => (),
+ _ if is_id_first_alphabetic(ch) => first_alphabetic = true,
+ _ if !first_alphabetic => return false,
+ _ if char::is_ascii_alphanumeric(&ch) => (),
+ _ => return false,
+ }
+ }
+
+ first_alphabetic
+}
+
+#[cfg(feature = "unicode-xid-ident")]
+#[inline(always)]
+fn is_id_first_alphabetic(x: char) -> bool {
+ unicode_xid::UnicodeXID::is_xid_start(x)
+}
+
+#[cfg(feature = "unicode-xid-ident")]
+#[inline(always)]
+fn is_id_continue(x: char) -> bool {
+ unicode_xid::UnicodeXID::is_xid_continue(x)
+}
+
+#[cfg(not(feature = "unicode-xid-ident"))]
+#[inline(always)]
+fn is_id_first_alphabetic(x: char) -> bool {
+ x.is_ascii_alphabetic()
+}
+
+#[cfg(not(feature = "unicode-xid-ident"))]
+#[inline(always)]
+fn is_id_continue(x: char) -> bool {
+ x.is_ascii_alphanumeric() || x == '_'
+}
+
/// A type that implements the `InputStream` trait.
/// Multiple character streams are jointed together to form one single stream.
pub struct MultiInputsStream<'a> {
diff --git a/tests/tokens.rs b/tests/tokens.rs
index 523beab7..e393663d 100644
--- a/tests/tokens.rs
+++ b/tests/tokens.rs
@@ -51,3 +51,29 @@ fn test_tokens_custom_operator() -> Result<(), Box> {
Ok(())
}
+
+#[test]
+fn test_tokens_unicode_xid_ident() -> Result<(), Box> {
+ let engine = Engine::new();
+ let result = engine.eval::(
+ r"
+ fn すべての答え() { 42 }
+ すべての答え()
+ ",
+ );
+ #[cfg(feature = "unicode-xid-ident")]
+ assert_eq!(result?, 42);
+
+ #[cfg(not(feature = "unicode-xid-ident"))]
+ assert!(result.is_err());
+
+ let result = engine.eval::(
+ r"
+ fn _1() { 1 }
+ _1()
+ ",
+ );
+ assert!(result.is_err());
+
+ Ok(())
+}