diff --git a/src/engine_api.rs b/src/engine_api.rs index 3b4cdee9..403080a2 100644 --- a/src/engine_api.rs +++ b/src/engine_api.rs @@ -9,7 +9,6 @@ use crate::parser::ParseState; use crate::stdlib::{ any::{type_name, TypeId}, boxed::Box, - num::NonZeroUsize, string::String, }; use crate::{ @@ -1158,16 +1157,8 @@ impl Engine { scripts: &[&str], optimization_level: OptimizationLevel, ) -> Result { - let (stream, buffer) = self.lex_raw(scripts, None); - let mut state = ParseState::new( - self, - buffer, - #[cfg(not(feature = "unchecked"))] - NonZeroUsize::new(self.max_expr_depth()), - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_function"))] - NonZeroUsize::new(self.max_function_expr_depth()), - ); + let (stream, tokenizer_control) = self.lex_raw(scripts, None); + let mut state = ParseState::new(self, tokenizer_control); self.parse( &mut stream.peekable(), &mut state, @@ -1347,7 +1338,7 @@ impl Engine { .into()); }; - let (stream, buffer) = self.lex_raw( + let (stream, tokenizer_control) = self.lex_raw( &scripts, Some(if has_null { |token| match token { @@ -1360,15 +1351,7 @@ impl Engine { }), ); - let mut state = ParseState::new( - self, - buffer, - #[cfg(not(feature = "unchecked"))] - NonZeroUsize::new(self.max_expr_depth()), - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_function"))] - NonZeroUsize::new(self.max_function_expr_depth()), - ); + let mut state = ParseState::new(self, tokenizer_control); let ast = self.parse_global_expr( &mut stream.peekable(), @@ -1454,18 +1437,10 @@ impl Engine { script: &str, ) -> Result { let scripts = [script]; - let (stream, buffer) = self.lex_raw(&scripts, None); + let (stream, tokenizer_control) = self.lex_raw(&scripts, None); let mut peekable = stream.peekable(); - let mut state = ParseState::new( - self, - buffer, - #[cfg(not(feature = "unchecked"))] - NonZeroUsize::new(self.max_expr_depth()), - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_function"))] - NonZeroUsize::new(self.max_function_expr_depth()), - ); + let mut state = ParseState::new(self, tokenizer_control); self.parse_global_expr(&mut peekable, &mut state, scope, self.optimization_level) } /// Evaluate a script file. @@ -1624,16 +1599,8 @@ impl Engine { script: &str, ) -> Result> { let scripts = [script]; - let (stream, buffer) = self.lex_raw(&scripts, None); - let mut state = ParseState::new( - self, - buffer, - #[cfg(not(feature = "unchecked"))] - NonZeroUsize::new(self.max_expr_depth()), - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_function"))] - NonZeroUsize::new(self.max_function_expr_depth()), - ); + let (stream, tokenizer_control) = self.lex_raw(&scripts, None); + let mut state = ParseState::new(self, tokenizer_control); // No need to optimize a lone expression let ast = self.parse_global_expr( @@ -1779,16 +1746,8 @@ impl Engine { script: &str, ) -> Result<(), Box> { let scripts = [script]; - let (stream, buffer) = self.lex_raw(&scripts, None); - let mut state = ParseState::new( - self, - buffer, - #[cfg(not(feature = "unchecked"))] - NonZeroUsize::new(self.max_expr_depth()), - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_function"))] - NonZeroUsize::new(self.max_function_expr_depth()), - ); + let (stream, tokenizer_control) = self.lex_raw(&scripts, None); + let mut state = ParseState::new(self, tokenizer_control); let ast = self.parse( &mut stream.peekable(), diff --git a/src/parser.rs b/src/parser.rs index 96482ca6..25622144 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -11,7 +11,6 @@ use crate::optimize::optimize_into_ast; use crate::optimize::OptimizationLevel; use crate::stdlib::{ boxed::Box, - cell::Cell, collections::BTreeMap, format, hash::{Hash, Hasher}, @@ -22,7 +21,9 @@ use crate::stdlib::{ vec::Vec, }; use crate::syntax::{CustomSyntax, MARKER_BLOCK, MARKER_EXPR, MARKER_IDENT}; -use crate::token::{is_keyword_function, is_valid_identifier, Token, TokenStream}; +use crate::token::{ + is_keyword_function, is_valid_identifier, Token, TokenStream, TokenizerControl, +}; use crate::utils::{get_hasher, IdentifierBuilder}; use crate::{ calc_fn_hash, Dynamic, Engine, Identifier, LexError, ParseError, ParseErrorType, Position, @@ -45,7 +46,7 @@ pub struct ParseState<'e> { /// Reference to the scripting [`Engine`]. engine: &'e Engine, /// Input stream buffer containing the next character to read. - buffer: Shared>>, + tokenizer_control: TokenizerControl, /// Interned strings. interned_strings: IdentifierBuilder, /// Encapsulates a local stack with variable names to simulate an actual runtime scope. @@ -76,22 +77,15 @@ pub struct ParseState<'e> { impl<'e> ParseState<'e> { /// Create a new [`ParseState`]. #[inline(always)] - pub fn new( - engine: &'e Engine, - buffer: Shared>>, - #[cfg(not(feature = "unchecked"))] max_expr_depth: Option, - #[cfg(not(feature = "unchecked"))] - #[cfg(not(feature = "no_function"))] - max_function_expr_depth: Option, - ) -> Self { + pub fn new(engine: &'e Engine, tokenizer_control: TokenizerControl) -> Self { Self { engine, - buffer, + tokenizer_control, #[cfg(not(feature = "unchecked"))] - max_expr_depth, + max_expr_depth: NonZeroUsize::new(engine.max_expr_depth()), #[cfg(not(feature = "unchecked"))] #[cfg(not(feature = "no_function"))] - max_function_expr_depth, + max_function_expr_depth: NonZeroUsize::new(engine.max_function_expr_depth()), #[cfg(not(feature = "no_closure"))] external_vars: Default::default(), #[cfg(not(feature = "no_closure"))] @@ -982,14 +976,8 @@ fn parse_primary( // | ... #[cfg(not(feature = "no_function"))] Token::Pipe | Token::Or if settings.allow_anonymous_fn => { - let mut new_state = ParseState::new( - state.engine, - state.buffer.clone(), - #[cfg(not(feature = "unchecked"))] - state.max_function_expr_depth, - #[cfg(not(feature = "unchecked"))] - state.max_function_expr_depth, - ); + let mut new_state = ParseState::new(state.engine, state.tokenizer_control.clone()); + new_state.max_expr_depth = new_state.max_function_expr_depth; let settings = ParseSettings { allow_if_expr: true, @@ -1034,7 +1022,9 @@ fn parse_primary( segments.push(expr); // Make sure to parse the following as text - state.buffer.set(Some('`')); + let mut control = state.tokenizer_control.get(); + control.is_within_text = true; + state.tokenizer_control.set(control); match input.next().unwrap() { (Token::StringConstant(s), pos) => { @@ -2540,14 +2530,9 @@ fn parse_stmt( match input.next().unwrap() { (Token::Fn, pos) => { - let mut new_state = ParseState::new( - state.engine, - state.buffer.clone(), - #[cfg(not(feature = "unchecked"))] - state.max_function_expr_depth, - #[cfg(not(feature = "unchecked"))] - state.max_function_expr_depth, - ); + let mut new_state = + ParseState::new(state.engine, state.tokenizer_control.clone()); + new_state.max_expr_depth = new_state.max_function_expr_depth; let settings = ParseSettings { allow_if_expr: true, diff --git a/src/token.rs b/src/token.rs index 3a64bd84..7dab498c 100644 --- a/src/token.rs +++ b/src/token.rs @@ -11,10 +11,11 @@ use crate::stdlib::{ iter::{FusedIterator, Peekable}, num::NonZeroUsize, ops::{Add, AddAssign}, + rc::Rc, str::{Chars, FromStr}, string::{String, ToString}, }; -use crate::{Engine, LexError, Shared, StaticVec, INT}; +use crate::{Engine, LexError, StaticVec, INT}; #[cfg(not(feature = "no_float"))] use crate::ast::FloatWrapper; @@ -25,6 +26,17 @@ use rust_decimal::Decimal; #[cfg(not(feature = "no_function"))] use crate::engine::KEYWORD_IS_DEF_FN; +/// A type containing commands to control the tokenizer. +#[derive(Debug, Clone, Eq, PartialEq, Hash, Copy, Default)] +pub struct TokenizeControlBlock { + /// Is the current tokenizer position within an interpolated text string? + /// This flag allows switching the tokenizer back to _text_ parsing after an interpolation stream. + pub is_within_text: bool, +} + +/// A shared object that allows control of the tokenizer from outside. +pub type TokenizerControl = Rc>; + type LERR = LexError; /// Separator character for numbers. @@ -849,6 +861,9 @@ pub trait InputStream { /// _(INTERNALS)_ Parse a string literal ended by `termination_char`. /// Exported under the `internals` feature only. /// +/// Returns the parsed string and a boolean indicating whether the string is +/// terminated by an interpolation `${`. +/// /// # Volatile API /// /// This function is volatile and may change. @@ -1840,8 +1855,8 @@ pub struct TokenIterator<'a> { state: TokenizeState, /// Current position. pos: Position, - /// Buffer containing the next character to read, if any. - buffer: Shared>>, + /// External buffer containing the next character to read, if any. + tokenizer_control: TokenizerControl, /// Input character stream. stream: MultiInputsStream<'a>, /// A processor function that maps a token to another. @@ -1852,9 +1867,16 @@ impl<'a> Iterator for TokenIterator<'a> { type Item = (Token, Position); fn next(&mut self) -> Option { - if let Some(ch) = self.buffer.take() { - self.stream.unget(ch); + let mut control = self.tokenizer_control.get(); + + if control.is_within_text { + // Push a back-tick into the stream + self.stream.unget('`'); + // Rewind the current position by one character self.pos.rewind(); + // Reset it + control.is_within_text = false; + self.tokenizer_control.set(control); } let (token, pos) = match get_next_token(&mut self.stream, &mut self.state, &mut self.pos) { @@ -1945,7 +1967,7 @@ impl Engine { pub fn lex<'a>( &'a self, input: impl IntoIterator, - ) -> (TokenIterator<'a>, Shared>>) { + ) -> (TokenIterator<'a>, ExternalBuffer) { self.lex_raw(input, None) } /// _(INTERNALS)_ Tokenize an input text stream with a mapping function. @@ -1956,7 +1978,7 @@ impl Engine { &'a self, input: impl IntoIterator, map: fn(Token) -> Token, - ) -> (TokenIterator<'a>, Shared>>) { + ) -> (TokenIterator<'a>, ExternalBuffer) { self.lex_raw(input, Some(map)) } /// Tokenize an input text stream with an optional mapping function. @@ -1965,8 +1987,8 @@ impl Engine { &'a self, input: impl IntoIterator, map: Option Token>, - ) -> (TokenIterator<'a>, Shared>>) { - let buffer: Shared>> = Cell::new(None).into(); + ) -> (TokenIterator<'a>, TokenizerControl) { + let buffer: TokenizerControl = Default::default(); let buffer2 = buffer.clone(); ( @@ -1984,7 +2006,7 @@ impl Engine { disable_doc_comments: self.disable_doc_comments, }, pos: Position::new(1, 0), - buffer, + tokenizer_control: buffer, stream: MultiInputsStream { buf: None, streams: input.into_iter().map(|s| s.chars().peekable()).collect(),