Add custom syntax.

This commit is contained in:
Stephen Chung
2020-07-09 19:54:28 +08:00
parent e33760a7d4
commit 99164ebceb
10 changed files with 483 additions and 132 deletions

View File

@@ -312,6 +312,87 @@ impl Token {
}
}
/// Reverse lookup a token from a piece of syntax.
pub fn lookup_from_syntax(syntax: &str) -> Option<Self> {
use Token::*;
Some(match syntax {
"{" => LeftBrace,
"}" => RightBrace,
"(" => LeftParen,
")" => RightParen,
"[" => LeftBracket,
"]" => RightBracket,
"+" => Plus,
"-" => Minus,
"*" => Multiply,
"/" => Divide,
";" => SemiColon,
":" => Colon,
"::" => DoubleColon,
"," => Comma,
"." => Period,
"#{" => MapStart,
"=" => Equals,
"true" => True,
"false" => False,
"let" => Let,
"const" => Const,
"if" => If,
"else" => Else,
"while" => While,
"loop" => Loop,
"for" => For,
"in" => In,
"<" => LessThan,
">" => GreaterThan,
"!" => Bang,
"<=" => LessThanEqualsTo,
">=" => GreaterThanEqualsTo,
"==" => EqualsTo,
"!=" => NotEqualsTo,
"|" => Pipe,
"||" => Or,
"&" => Ampersand,
"&&" => And,
#[cfg(not(feature = "no_function"))]
"fn" => Fn,
"continue" => Continue,
"break" => Break,
"return" => Return,
"throw" => Throw,
"+=" => PlusAssign,
"-=" => MinusAssign,
"*=" => MultiplyAssign,
"/=" => DivideAssign,
"<<=" => LeftShiftAssign,
">>=" => RightShiftAssign,
"&=" => AndAssign,
"|=" => OrAssign,
"^=" => XOrAssign,
"<<" => LeftShift,
">>" => RightShift,
"^" => XOr,
"%" => Modulo,
"%=" => ModuloAssign,
"~" => PowerOf,
"~=" => PowerOfAssign,
#[cfg(not(feature = "no_function"))]
"private" => Private,
#[cfg(not(feature = "no_module"))]
"import" => Import,
#[cfg(not(feature = "no_module"))]
"export" => Export,
#[cfg(not(feature = "no_module"))]
"as" => As,
"===" | "!==" | "->" | "<-" | "=>" | ":=" | "::<" | "(*" | "*)" | "#" => {
Reserved(syntax.into())
}
_ => return None,
})
}
// Is this token EOF?
pub fn is_eof(&self) -> bool {
use Token::*;
@@ -628,9 +709,9 @@ pub fn parse_string_literal(
}
/// Consume the next character.
fn eat_next(stream: &mut impl InputStream, pos: &mut Position) {
stream.get_next();
fn eat_next(stream: &mut impl InputStream, pos: &mut Position) -> Option<char> {
pos.advance();
stream.get_next()
}
/// Scan for a block comment until the end.
@@ -858,35 +939,8 @@ fn get_next_token_inner(
}
return Some((
match identifier.as_str() {
"true" => Token::True,
"false" => Token::False,
"let" => Token::Let,
"const" => Token::Const,
"if" => Token::If,
"else" => Token::Else,
"while" => Token::While,
"loop" => Token::Loop,
"continue" => Token::Continue,
"break" => Token::Break,
"return" => Token::Return,
"throw" => Token::Throw,
"for" => Token::For,
"in" => Token::In,
#[cfg(not(feature = "no_function"))]
"private" => Token::Private,
#[cfg(not(feature = "no_module"))]
"import" => Token::Import,
#[cfg(not(feature = "no_module"))]
"export" => Token::Export,
#[cfg(not(feature = "no_module"))]
"as" => Token::As,
#[cfg(not(feature = "no_function"))]
"fn" => Token::Fn,
_ => Token::Identifier(identifier),
},
Token::lookup_from_syntax(&identifier)
.unwrap_or_else(|| Token::Identifier(identifier)),
start_pos,
));
}
@@ -947,6 +1001,7 @@ fn get_next_token_inner(
eat_next(stream, pos);
return Some((Token::MapStart, start_pos));
}
('#', _) => return Some((Token::Reserved("#".into()), start_pos)),
// Operators
('+', '=') => {
@@ -1163,40 +1218,42 @@ fn get_next_token_inner(
}
/// A type that implements the `InputStream` trait.
/// Multiple charaacter streams are jointed together to form one single stream.
/// Multiple character streams are jointed together to form one single stream.
pub struct MultiInputsStream<'a> {
/// The input character streams.
streams: StaticVec<Peekable<Chars<'a>>>,
/// The current stream index.
index: usize,
}
impl InputStream for MultiInputsStream<'_> {
/// Get the next character
fn get_next(&mut self) -> Option<char> {
loop {
if self.streams.is_empty() {
if self.index >= self.streams.len() {
// No more streams
return None;
} else if let Some(ch) = self.streams[0].next() {
} else if let Some(ch) = self.streams[self.index].next() {
// Next character in current stream
return Some(ch);
} else {
// Jump to the next stream
let _ = self.streams.remove(0);
self.index += 1;
}
}
}
/// Peek the next character
fn peek_next(&mut self) -> Option<char> {
loop {
if self.streams.is_empty() {
if self.index >= self.streams.len() {
// No more streams
return None;
} else if let Some(ch) = self.streams[0].peek() {
} else if let Some(&ch) = self.streams[self.index].peek() {
// Next character in current stream
return Some(*ch);
return Some(ch);
} else {
// Jump to the next stream
let _ = self.streams.remove(0);
self.index += 1;
}
}
}
@@ -1252,7 +1309,11 @@ impl<'a> Iterator for TokenIterator<'a, '_> {
.to_string(),
))),
"(*" | "*)" => Token::LexError(Box::new(LERR::ImproperSymbol(
"'(* .. *)' is not a valid comment style. This is not Pascal! Should it be '/* .. */'?"
"'(* .. *)' is not a valid comment format. This is not Pascal! Should it be '/* .. */'?"
.to_string(),
))),
"#" => Token::LexError(Box::new(LERR::ImproperSymbol(
"'#' is not a valid symbol. Should it be '#{'?"
.to_string(),
))),
token => Token::LexError(Box::new(LERR::ImproperSymbol(
@@ -1298,6 +1359,7 @@ pub fn lex<'a, 'e>(input: &'a [&'a str], engine: &'e Engine) -> TokenIterator<'a
pos: Position::new(1, 0),
stream: MultiInputsStream {
streams: input.iter().map(|s| s.chars().peekable()).collect(),
index: 0,
},
}
}