feat: add base parser

Signed-off-by: kjuulh <contact@kjuulh.io>

feat: with basic assignment

Signed-off-by: kjuulh <contact@kjuulh.io>

feat: remove target

Signed-off-by: kjuulh <contact@kjuulh.io>
This commit is contained in:
2023-07-03 20:16:09 +02:00
commit 67e3f73ab4
18 changed files with 4113 additions and 0 deletions

328
src/grammar.json Normal file
View File

@@ -0,0 +1,328 @@
{
"name": "rhai",
"word": "identifier",
"rules": {
"source_file": {
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "_statement"
}
},
"_statement": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "expression_statement"
},
{
"type": "SYMBOL",
"name": "_declaration_statement"
}
]
},
"expression_statement": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "SYMBOL",
"name": "_expression"
},
{
"type": "STRING",
"value": ";"
}
]
}
]
},
"_declaration_statement": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "let_declaration"
}
]
},
"let_declaration": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "let"
},
{
"type": "FIELD",
"name": "pattern",
"content": {
"type": "SYMBOL",
"name": "_pattern"
}
},
{
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "="
},
{
"type": "FIELD",
"name": "value",
"content": {
"type": "SYMBOL",
"name": "_expression"
}
}
]
},
{
"type": "BLANK"
}
]
},
{
"type": "STRING",
"value": ";"
}
]
},
"_pattern": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "_literal_pattern"
},
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "STRING",
"value": "_"
}
]
},
"_expression": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "assignment_expression"
},
{
"type": "SYMBOL",
"name": "_literal"
},
{
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
},
"assignment_expression": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{
"type": "FIELD",
"name": "left",
"content": {
"type": "SYMBOL",
"name": "_expression"
}
},
{
"type": "STRING",
"value": "="
},
{
"type": "FIELD",
"name": "right",
"content": {
"type": "SYMBOL",
"name": "_expression"
}
}
]
}
},
"_literal": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "string_literal"
},
{
"type": "SYMBOL",
"name": "raw_string_literal"
},
{
"type": "SYMBOL",
"name": "integer_literal"
}
]
},
"_literal_pattern": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "string_literal"
},
{
"type": "SYMBOL",
"name": "raw_string_literal"
},
{
"type": "SYMBOL",
"name": "integer_literal"
}
]
},
"integer_literal": {
"type": "TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[0-9][0-9_]*"
}
]
}
]
}
},
"string_literal": {
"type": "SEQ",
"members": [
{
"type": "ALIAS",
"content": {
"type": "PATTERN",
"value": "b?\""
},
"named": false,
"value": "\""
},
{
"type": "REPEAT",
"content": {
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "escape_sequence"
},
{
"type": "SYMBOL",
"name": "_string_content"
}
]
}
},
{
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "STRING",
"value": "\""
}
}
]
},
"escape_sequence": {
"type": "IMMEDIATE_TOKEN",
"content": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "\\"
},
{
"type": "CHOICE",
"members": [
{
"type": "PATTERN",
"value": "[^xu]"
},
{
"type": "PATTERN",
"value": "u[0-9a-fA-F]{4}"
},
{
"type": "PATTERN",
"value": "u{[0-9a-fA-F]+}"
},
{
"type": "PATTERN",
"value": "x[0-9a-fA-F]{2}"
}
]
}
]
}
},
"identifier": {
"type": "PATTERN",
"value": "(r#)?[_\\p{XID_Start}][_\\p{XID_Continue}]*"
}
},
"extras": [
{
"type": "PATTERN",
"value": "\\s"
}
],
"conflicts": [],
"precedences": [],
"externals": [
{
"type": "SYMBOL",
"name": "_string_content"
},
{
"type": "SYMBOL",
"name": "raw_string_literal"
},
{
"type": "SYMBOL",
"name": "float_literal"
},
{
"type": "SYMBOL",
"name": "block_comment"
}
],
"inline": [
"_declaration_statement"
],
"supertypes": [
"_expression",
"_literal",
"_literal_pattern",
"_declaration_statement",
"_pattern"
]
}

221
src/node-types.json Normal file
View File

@@ -0,0 +1,221 @@
[
{
"type": "_declaration_statement",
"named": true,
"subtypes": [
{
"type": "let_declaration",
"named": true
}
]
},
{
"type": "_expression",
"named": true,
"subtypes": [
{
"type": "_literal",
"named": true
},
{
"type": "assignment_expression",
"named": true
},
{
"type": "identifier",
"named": true
}
]
},
{
"type": "_literal",
"named": true,
"subtypes": [
{
"type": "integer_literal",
"named": true
},
{
"type": "raw_string_literal",
"named": true
},
{
"type": "string_literal",
"named": true
}
]
},
{
"type": "_literal_pattern",
"named": true,
"subtypes": [
{
"type": "integer_literal",
"named": true
},
{
"type": "raw_string_literal",
"named": true
},
{
"type": "string_literal",
"named": true
}
]
},
{
"type": "_pattern",
"named": true,
"subtypes": [
{
"type": "_",
"named": false
},
{
"type": "_literal_pattern",
"named": true
},
{
"type": "identifier",
"named": true
}
]
},
{
"type": "assignment_expression",
"named": true,
"fields": {
"left": {
"multiple": false,
"required": true,
"types": [
{
"type": "_expression",
"named": true
}
]
},
"right": {
"multiple": false,
"required": true,
"types": [
{
"type": "_expression",
"named": true
}
]
}
}
},
{
"type": "expression_statement",
"named": true,
"fields": {},
"children": {
"multiple": false,
"required": true,
"types": [
{
"type": "_expression",
"named": true
}
]
}
},
{
"type": "let_declaration",
"named": true,
"fields": {
"pattern": {
"multiple": false,
"required": true,
"types": [
{
"type": "_pattern",
"named": true
}
]
},
"value": {
"multiple": false,
"required": false,
"types": [
{
"type": "_expression",
"named": true
}
]
}
}
},
{
"type": "source_file",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "_declaration_statement",
"named": true
},
{
"type": "expression_statement",
"named": true
}
]
}
},
{
"type": "string_literal",
"named": true,
"fields": {},
"children": {
"multiple": true,
"required": false,
"types": [
{
"type": "escape_sequence",
"named": true
}
]
}
},
{
"type": "\"",
"named": false
},
{
"type": ";",
"named": false
},
{
"type": "=",
"named": false
},
{
"type": "_",
"named": false
},
{
"type": "escape_sequence",
"named": true
},
{
"type": "identifier",
"named": true
},
{
"type": "integer_literal",
"named": true
},
{
"type": "let",
"named": false
},
{
"type": "raw_string_literal",
"named": true
}
]

2663
src/parser.c Normal file

File diff suppressed because it is too large Load Diff

191
src/scanner.c Normal file
View File

@@ -0,0 +1,191 @@
#include <tree_sitter/parser.h>
#include <wctype.h>
enum TokenType {
STRING_CONTENT,
RAW_STRING_LITERAL,
FLOAT_LITERAL,
BLOCK_COMMENT,
};
void *tree_sitter_rhai_external_scanner_create() { return NULL; }
void tree_sitter_rhai_external_scanner_destroy(void *p) {}
void tree_sitter_rhai_external_scanner_reset(void *p) {}
unsigned tree_sitter_rhai_external_scanner_serialize(void *p, char *buffer) {
return 0;
}
void tree_sitter_rhai_external_scanner_deserialize(void *p, const char *b,
unsigned n) {}
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); }
bool tree_sitter_rhai_external_scanner_scan(void *payload, TSLexer *lexer,
const bool *valid_symbols) {
if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) {
bool has_content = false;
for (;;) {
if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
break;
} else if (lexer->lookahead == 0) {
return false;
}
has_content = true;
advance(lexer);
}
lexer->result_symbol = STRING_CONTENT;
return has_content;
}
while (iswspace(lexer->lookahead))
lexer->advance(lexer, true);
if (valid_symbols[RAW_STRING_LITERAL] &&
(lexer->lookahead == 'r' || lexer->lookahead == 'b')) {
lexer->result_symbol = RAW_STRING_LITERAL;
if (lexer->lookahead == 'b')
advance(lexer);
if (lexer->lookahead != 'r')
return false;
advance(lexer);
unsigned opening_hash_count = 0;
while (lexer->lookahead == '#') {
advance(lexer);
opening_hash_count++;
}
if (lexer->lookahead != '"')
return false;
advance(lexer);
for (;;) {
if (lexer->lookahead == 0) {
return false;
} else if (lexer->lookahead == '"') {
advance(lexer);
unsigned hash_count = 0;
while (lexer->lookahead == '#' && hash_count < opening_hash_count) {
advance(lexer);
hash_count++;
}
if (hash_count == opening_hash_count) {
return true;
}
} else {
advance(lexer);
}
}
}
if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) {
lexer->result_symbol = FLOAT_LITERAL;
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
bool has_fraction = false, has_exponent = false;
if (lexer->lookahead == '.') {
has_fraction = true;
advance(lexer);
if (iswalpha(lexer->lookahead)) {
// The dot is followed by a letter: 1.max(2) => not a float but an
// integer
return false;
}
if (lexer->lookahead == '.') {
return false;
}
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
}
lexer->mark_end(lexer);
if (lexer->lookahead == 'e' || lexer->lookahead == 'E') {
has_exponent = true;
advance(lexer);
if (lexer->lookahead == '+' || lexer->lookahead == '-') {
advance(lexer);
}
if (!is_num_char(lexer->lookahead)) {
return true;
}
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
}
if (!has_exponent && !has_fraction)
return false;
if (lexer->lookahead != 'u' && lexer->lookahead != 'i' &&
lexer->lookahead != 'f') {
return true;
}
advance(lexer);
if (!iswdigit(lexer->lookahead)) {
return true;
}
while (iswdigit(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
return true;
}
if (lexer->lookahead == '/') {
advance(lexer);
if (lexer->lookahead != '*')
return false;
advance(lexer);
bool after_star = false;
unsigned nesting_depth = 1;
for (;;) {
switch (lexer->lookahead) {
case '\0':
return false;
case '*':
advance(lexer);
after_star = true;
break;
case '/':
if (after_star) {
advance(lexer);
after_star = false;
nesting_depth--;
if (nesting_depth == 0) {
lexer->result_symbol = BLOCK_COMMENT;
return true;
}
} else {
advance(lexer);
after_star = false;
if (lexer->lookahead == '*') {
nesting_depth++;
advance(lexer);
}
}
break;
default:
advance(lexer);
after_star = false;
break;
}
}
}
return false;
}

224
src/tree_sitter/parser.h Normal file
View File

@@ -0,0 +1,224 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
typedef uint16_t TSStateId;
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
/*
* Lexer Macros
*/
#define START_LEXER() \
bool result = false; \
bool skip = false; \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = state_value, \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_val, child_count_val, ...) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_