chore: scaffold tree-sitter-rune project

This commit is contained in:
2026-03-27 09:42:45 +01:00
commit 64bfd3066e
19 changed files with 5466 additions and 0 deletions
+15
View File
@@ -0,0 +1,15 @@
root = true
[*]
charset = utf-8
end_of_line = lf
indent_size = 2
indent_style = space
insert_final_newline = true
trim_trailing_whitespace = true
[*.scm]
indent_size = 2
[Makefile]
indent_style = tab
+40
View File
@@ -0,0 +1,40 @@
# Rust artifacts
target/
# Node artifacts
build/
prebuilds/
node_modules/
# Swift artifacts
.build/
# Go artifacts
_obj/
# Python artifacts
.venv/
dist/
*.egg-info
*.whl
# C artifacts
*.a
*.so
*.so.*
*.dylib
*.dll
*.pc
# Example dirs
/examples/*/
# Grammar volatiles
*.wasm
*.obj
*.o
# Archives
*.tar.gz
*.tgz
*.zip
+32
View File
@@ -0,0 +1,32 @@
[package]
name = "tree-sitter-rune"
version = "0.1.0"
description = "Rune grammar for tree-sitter"
authors = []
license = "MIT"
edition = "2021"
rust-version = "1.65"
keywords = ["parser", "tree-sitter", "rune"]
include = [
"bindings/rust/*",
"queries/*",
"src/*",
"grammar.js",
"tree-sitter.json",
]
[lib]
path = "bindings/rust/lib.rs"
[[example]]
name = "parse"
path = "bindings/rust/examples/parse.rs"
[dependencies]
tree-sitter-language = "0.1"
[build-dependencies]
cc = "1.1"
[dev-dependencies]
tree-sitter = "0.25"
+30
View File
@@ -0,0 +1,30 @@
{
"targets": [
{
"target_name": "tree_sitter_rune_binding",
"dependencies": [
"<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
],
"include_dirs": [
"src",
],
"sources": [
"bindings/node/binding.cc",
"src/parser.c",
"src/scanner.c",
],
"conditions": [
["OS!='win'", {
"cflags_c": [
"-std=c11",
],
}, { # OS == "win"
"cflags_c": [
"/std:c11",
"/utf-8",
],
}],
],
}
]
}
+20
View File
@@ -0,0 +1,20 @@
#include <napi.h>
typedef struct TSLanguage TSLanguage;
extern "C" TSLanguage *tree_sitter_rune();
// "tree-sitter", "language" hashed with BLAKE2
const napi_type_tag LANGUAGE_TYPE_TAG = {
0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
};
Napi::Object Init(Napi::Env env, Napi::Object exports) {
exports["name"] = Napi::String::New(env, "rune");
auto language = Napi::External<TSLanguage>::New(env, tree_sitter_rune());
language.TypeTag(&LANGUAGE_TYPE_TAG);
exports["language"] = language;
return exports;
}
NODE_API_MODULE(tree_sitter_rune_binding, Init)
+32
View File
@@ -0,0 +1,32 @@
type BaseNode = {
type: string;
named: boolean;
};
type ChildNode = {
multiple: boolean;
required: boolean;
types: BaseNode[];
};
type NodeInfo =
| {
type: string;
named: boolean;
subtypes: BaseNode[];
}
| {
type: string;
named: boolean;
fields: { [name: string]: ChildNode };
children: ChildNode[];
};
type Language = {
name: string;
language: unknown;
nodeTypeInfo: NodeInfo[];
};
declare const language: Language;
export = language;
+29
View File
@@ -0,0 +1,29 @@
const root = require("path").join(__dirname, "..", "..");
if (process.versions?.bun) {
const fs = require("fs");
const os = require("os");
const path = require("path");
const bindingName = "tree_sitter_rune_binding";
const platformFolder = `${os.platform()}-${os.arch()}`;
const prebuildsDir = path.join(root, "prebuilds", platformFolder);
if (fs.existsSync(prebuildsDir)) {
const files = fs.readdirSync(prebuildsDir);
const nodeFile = files.find((f) => f.endsWith(".node"));
if (nodeFile) {
module.exports = require(path.join(prebuildsDir, nodeFile));
}
}
if (!module.exports) {
module.exports = require(`${root}/build/Release/${bindingName}.node`);
}
} else {
module.exports = require("node-gyp-build")(root);
}
try {
module.exports.nodeTypeInfo = require("../../src/node-types.json");
} catch (_) {}
@@ -0,0 +1,26 @@
from importlib.resources import files
from ._binding import language
def _get_query(name, filename):
if name in globals():
return globals()[name]
query = files(__name__).joinpath("queries").joinpath(filename).read_text()
globals()[name] = query
return query
def __getattr__(name):
if name == "HIGHLIGHTS_QUERY":
return _get_query("HIGHLIGHTS_QUERY", "highlights.scm")
if name == "TAGS_QUERY":
return _get_query("TAGS_QUERY", "tags.scm")
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
def __dir__():
return [*globals(), "HIGHLIGHTS_QUERY", "TAGS_QUERY"]
__all__ = ["language", "HIGHLIGHTS_QUERY", "TAGS_QUERY"]
+19
View File
@@ -0,0 +1,19 @@
fn main() {
let src_dir = std::path::Path::new("src");
let mut c_config = cc::Build::new();
c_config.std("c11").include(src_dir);
#[cfg(target_env = "msvc")]
c_config.flag("-utf-8");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
let scanner_path = src_dir.join("scanner.c");
c_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
c_config.compile("tree-sitter-rune");
}
+30
View File
@@ -0,0 +1,30 @@
use tree_sitter_language::LanguageFn;
extern "C" {
fn tree_sitter_rune() -> *const ();
}
/// The tree-sitter [`LanguageFn`] for this grammar.
pub static LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_rune) };
/// The content of the [`node-types.json`] file for this grammar.
///
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
/// The syntax highlighting query for this language.
pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
/// The tagging query for this language.
pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
#[cfg(test)]
mod tests {
#[test]
fn test_can_load_grammar() {
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&super::LANGUAGE.into())
.expect("Error loading Rune grammar");
}
}
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,252 @@
# tree-sitter-rune Design Spec
## Overview
A tree-sitter grammar for the [Rune](https://rune-rs.github.io/) programming language, targeting integration with nvim-treesitter for syntax highlighting, indentation, code folding, and scope tracking in Neovim.
Rune is a dynamically-typed, Rust-inspired scripting language designed for embedding in Rust applications. It uses the `.rn` file extension.
## Approach
Fork [tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) and modify:
- **Remove** ~30-40% of rules related to Rust's type system, ownership model, and static typing
- **Add** Rune-specific constructs (template literals, generators, select blocks, dynamic typing)
- **Adapt** shared constructs (functions, control flow, patterns) to Rune's untyped signatures
This leverages tree-sitter-rust's battle-tested expression parsing, operator precedence, and string handling while producing a clean grammar that maps 1:1 to Rune's actual syntax.
## Repository
- **Location:** `~/ssd/git/active/tree-sitter-rune`
- **License:** MIT
- **Target:** nvim-treesitter submission, npm and crates.io publishing
## Project Structure
```
tree-sitter-rune/
├── grammar.js # Grammar definition
├── package.json # Node project config with tree-sitter field
├── Cargo.toml # Rust bindings
├── binding.gyp # Node native addon build
├── bindings/ # Language bindings (node, rust)
├── src/ # Generated parser (not hand-edited)
│ ├── parser.c
│ ├── scanner.c # External scanner for template literals
│ └── ...
├── queries/
│ ├── highlights.scm # Syntax highlighting captures
│ ├── indents.scm # Auto-indentation rules
│ ├── folds.scm # Code folding regions
│ └── locals.scm # Scope/variable tracking
├── test/
│ └── corpus/ # Test cases (input -> expected tree)
│ ├── declarations.txt
│ ├── expressions.txt
│ ├── literals.txt
│ ├── patterns.txt
│ ├── control_flow.txt
│ ├── async.txt
│ ├── generators.txt
│ └── objects.txt
├── examples/ # Real .rn files for parse validation
├── LICENSE
└── README.md
```
## Grammar: Removals from tree-sitter-rust
These Rust constructs do not exist in Rune and are removed entirely:
| Construct | Reason |
|---|---|
| Lifetime annotations (`'a`, `'static`) | No borrow checker |
| Type annotations on params/returns | Dynamically typed |
| Generic type parameters (`<T, U>`) | No generics |
| Trait definitions and `impl Trait` | No trait system |
| `impl` blocks | No type-level methods |
| Borrow/reference syntax (`&`, `&mut`) | No ownership model |
| `where` clauses | No type constraints |
| `unsafe` blocks | No unsafe |
| `extern` blocks / FFI | Embedding is Rust-side |
| `const` / `static` items | Not in Rune |
| Type aliases (`type Foo = ...`) | No type system |
| Union types | Not in Rune |
| `dyn` / `Box` / smart pointer syntax | Runtime-managed |
| Turbofish (`::<Type>`) | No generics |
## Grammar: Additions for Rune
| Construct | Details |
|---|---|
| Template literals | Backtick strings with `${expr}` interpolation. Requires an external scanner for nested balanced expressions. Reference: tree-sitter-javascript's template literal scanner. |
| `select` blocks | `select { branch => expr, ... }` for async concurrency |
| `yield` expressions | `yield value` for generators |
| Untyped function params | `fn foo(a, b)` — parameter names only, no type annotations |
| Untyped struct fields | `struct Foo { x, y }` — field names only |
| Untyped enum variants | `enum E { Ok(value), Err(msg) }` — positional names, no types |
| Async closures | `async \|\| { ... }` and `async \|x\| { ... }` |
| Object literals | `#{ key: value }` syntax |
| `is` / `is not` operators | Runtime type checking |
| Byte literals | `b'x'` and `b"string"` |
## Grammar: Shared Constructs (Keep, Adapt)
These exist in both Rust and Rune with minor differences:
- **Functions:** Keep `fn`, `pub fn`, `async fn`. Remove return type annotations.
- **Control flow:** `if`/`else`, `match`, `loop`, `while`, `for`, `break`, `continue`, `return` — identical syntax.
- **Pattern matching:** Literal, tuple, struct, enum, wildcard, rest (`..`), guard clauses — same as Rust minus type patterns.
- **Closures:** `|args| expr` — same syntax, no type annotations on params.
- **Modules:** `mod`, `use`, `self`, `crate`, `super` — same as Rust.
- **Visibility:** `pub`, `pub(crate)`, `pub(super)`, `pub(self)` — same as Rust.
- **Operators:** Arithmetic, comparison, logical, assignment — same as Rust plus `is`/`is not`.
- **Comments:** `//` line and `/* */` block — same as Rust.
- **String literals:** Double-quoted strings with escape sequences — same as Rust.
## External Scanner
An external scanner (`src/scanner.c`) is needed for template literal parsing. The scanner must:
1. Recognize backtick (`` ` ``) as the start/end of a template literal
2. Track `${` as the start of an interpolation
3. Handle nested braces within interpolations (balanced `{}` counting)
4. Return to template literal content after `}` closes the interpolation
Reference implementation: [tree-sitter-javascript's scanner.c](https://github.com/tree-sitter/tree-sitter-javascript/blob/master/src/scanner.c) handles the same construct.
## Query Files
### highlights.scm
Key capture groups:
- `@keyword``fn`, `let`, `pub`, `use`, `mod`, `if`, `else`, `match`, `loop`, `while`, `for`, `return`, `async`, `await`, `yield`, `select`, `break`, `continue`, `in`, `is`
- `@function` — function definitions and calls
- `@function.method` — method calls (chained `.method()`)
- `@string` — regular strings and template literal content
- `@string.special``${...}` interpolation delimiters
- `@variable` — identifiers
- `@variable.parameter` — function parameters
- `@operator` — binary/unary operators
- `@type` — struct/enum names (PascalCase convention)
- `@module` — module paths
- `@comment` — line and block comments
- `@punctuation.bracket``()`, `[]`, `{}`
- `@punctuation.delimiter``,`, `;`, `::`
- `@constant.builtin``true`, `false`, `None`
- `@number` — integer and float literals
### indents.scm
Block-based indentation for `{}` bodies: functions, control flow, match arms, struct/enum definitions, closures, block expressions.
### folds.scm
Foldable regions: function bodies, struct/enum bodies, match blocks, block expressions, multi-line comment blocks.
### locals.scm
Scope definitions: function bodies, block expressions, closures, for/while/loop bodies.
Variable definitions: `let` bindings, function parameters, for-loop variables.
References: identifier usage.
## Formatter Integration
No custom formatter — Rune ships `rune fmt` since v0.13. Document integration with conform.nvim:
```lua
require("conform").setup({
formatters_by_ft = {
rune = { "rune_fmt" },
},
formatters = {
rune_fmt = {
command = "rune",
args = { "fmt", "$FILENAME" },
stdin = false,
},
},
})
```
This is documented in the README, not shipped as part of the grammar repo.
## Testing Strategy
### Test Corpus
Each `.txt` file in `test/corpus/` covers a category of syntax using tree-sitter's standard format:
```
==================
Test name
==================
source code here
---
(expected_syntax_tree)
```
**Categories:**
- `declarations.txt` — fn, struct, enum, mod, use, visibility modifiers
- `expressions.txt` — binary, unary, call, method chain, field access, index, range
- `literals.txt` — strings, template literals, numbers, booleans, byte literals, object literals
- `control_flow.txt` — if/else, match with guards, loops (loop/while/for), break/continue/return
- `patterns.txt` — match patterns (literal, tuple, struct, enum, wildcard, rest, or-patterns)
- `async.txt` — async fn, .await, select blocks, async closures
- `generators.txt` — yield expressions, generator functions, streams
- `objects.txt` — object literals (`#{}`), vector literals
### Real-World Validation
Parse existing `.rn` files to catch gaps:
- `~/.config/owlry/plugins/hyprshutdown/main.rn`
- Future owlry plugin files as they're written
### Validation Criteria
- All test corpus files pass `tree-sitter test`
- Real-world `.rn` files parse without errors
- Highlight queries produce sensible coloring in Neovim
- No parser crashes on malformed input (graceful error recovery)
## Development Phases
### Phase 1: Core Grammar
Fork tree-sitter-rust, strip type system rules, adapt function/struct/enum definitions to Rune's untyped signatures. Target: parse `main.rn` successfully.
### Phase 2: Rune-Specific Constructs
Add template literals (external scanner), select blocks, yield expressions, async closures, object literals, `is`/`is not`. Build test corpus alongside.
### Phase 3: Query Files
Write `highlights.scm`, `indents.scm`, `folds.scm`, `locals.scm`. Validate in Neovim with real files.
### Phase 4: Polish & Publish
- Complete test corpus coverage
- README with installation and usage instructions
- MIT LICENSE
- Submit PR to nvim-treesitter to register parser
- Publish to npm (`tree-sitter-rune`) and crates.io
## nvim-treesitter Submission Requirements
- Grammar hosted on public Git repository
- `highlights.scm` included at minimum
- Parser handles real-world code without crashing
- `package.json` contains `tree-sitter` field with grammar metadata
- Maintained and responsive to issues
## Dependencies
- **Build time:** Node.js (for `tree-sitter generate`), tree-sitter CLI, C compiler
- **Runtime:** None (compiled to a `.so` shared library)
- **Formatter:** `rune` CLI (installed separately by the user)
+1695
View File
File diff suppressed because it is too large Load Diff
+63
View File
@@ -0,0 +1,63 @@
{
"name": "tree-sitter-rune",
"version": "0.1.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "tree-sitter-rune",
"version": "0.1.0",
"hasInstallScript": true,
"license": "MIT",
"dependencies": {
"node-addon-api": "^8.2.2",
"node-gyp-build": "^4.8.4"
},
"devDependencies": {
"tree-sitter-cli": "^0.26.7"
},
"peerDependencies": {
"tree-sitter": "^0.25.0"
},
"peerDependenciesMeta": {
"tree-sitter": {
"optional": true
}
}
},
"node_modules/node-addon-api": {
"version": "8.7.0",
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.7.0.tgz",
"integrity": "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA==",
"license": "MIT",
"engines": {
"node": "^18 || ^20 || >= 21"
}
},
"node_modules/node-gyp-build": {
"version": "4.8.4",
"resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz",
"integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==",
"license": "MIT",
"bin": {
"node-gyp-build": "bin.js",
"node-gyp-build-optional": "optional.js",
"node-gyp-build-test": "build-test.js"
}
},
"node_modules/tree-sitter-cli": {
"version": "0.26.7",
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.26.7.tgz",
"integrity": "sha512-fOg/DokJr/gW7suy9IypT1MQon28+JxOmtjudrT04rHRyqVJzrvakrojwyU8r0U6UNRsZKilb8VIhyarv2XUkQ==",
"dev": true,
"hasInstallScript": true,
"license": "MIT",
"bin": {
"tree-sitter": "cli.js"
},
"engines": {
"node": ">=12.0.0"
}
}
}
}
+53
View File
@@ -0,0 +1,53 @@
{
"name": "tree-sitter-rune",
"version": "0.1.0",
"description": "Rune grammar for tree-sitter",
"main": "bindings/node",
"types": "bindings/node",
"keywords": [
"parser",
"tree-sitter",
"rune"
],
"files": [
"grammar.js",
"binding.gyp",
"prebuilds/",
"queries/",
"src/",
"bindings/node"
],
"dependencies": {
"node-addon-api": "^8.2.2",
"node-gyp-build": "^4.8.4"
},
"peerDependencies": {
"tree-sitter": "^0.25.0"
},
"peerDependenciesMeta": {
"tree-sitter": {
"optional": true
}
},
"devDependencies": {
"tree-sitter-cli": "^0.26.7"
},
"scripts": {
"install": "node-gyp-build",
"test": "node --test bindings/node/test",
"parse": "tree-sitter parse",
"generate": "tree-sitter generate"
},
"tree-sitter": [
{
"scope": "source.rune",
"file-types": [
"rn"
],
"highlights": "queries/highlights.scm",
"tags": "queries/tags.scm",
"injection-regex": "rune"
}
],
"license": "MIT"
}
+2
View File
@@ -0,0 +1,2 @@
; Highlights for Rune
; (populated in Task 10)
+2
View File
@@ -0,0 +1,2 @@
; Tags for Rune
; (populated in Task 11)
+393
View File
@@ -0,0 +1,393 @@
#include "tree_sitter/alloc.h"
#include "tree_sitter/parser.h"
#include <wctype.h>
enum TokenType {
STRING_CONTENT,
RAW_STRING_LITERAL_START,
RAW_STRING_LITERAL_CONTENT,
RAW_STRING_LITERAL_END,
FLOAT_LITERAL,
BLOCK_OUTER_DOC_MARKER,
BLOCK_INNER_DOC_MARKER,
BLOCK_COMMENT_CONTENT,
LINE_DOC_CONTENT,
ERROR_SENTINEL
};
typedef struct {
uint8_t opening_hash_count;
} Scanner;
void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); }
void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner *)payload); }
unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = (Scanner *)payload;
buffer[0] = (char)scanner->opening_hash_count;
return 1;
}
void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload;
scanner->opening_hash_count = 0;
if (length == 1) {
Scanner *scanner = (Scanner *)payload;
scanner->opening_hash_count = buffer[0];
}
}
static inline bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); }
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static inline bool process_string(TSLexer *lexer) {
bool has_content = false;
for (;;) {
if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
break;
}
if (lexer->eof(lexer)) {
return false;
}
has_content = true;
advance(lexer);
}
lexer->result_symbol = STRING_CONTENT;
lexer->mark_end(lexer);
return has_content;
}
static inline bool scan_raw_string_start(Scanner *scanner, TSLexer *lexer) {
if (lexer->lookahead == 'b' || lexer->lookahead == 'c') {
advance(lexer);
}
if (lexer->lookahead != 'r') {
return false;
}
advance(lexer);
uint8_t opening_hash_count = 0;
while (lexer->lookahead == '#') {
advance(lexer);
opening_hash_count++;
}
if (lexer->lookahead != '"') {
return false;
}
advance(lexer);
scanner->opening_hash_count = opening_hash_count;
lexer->result_symbol = RAW_STRING_LITERAL_START;
return true;
}
static inline bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) {
for (;;) {
if (lexer->eof(lexer)) {
return false;
}
if (lexer->lookahead == '"') {
lexer->mark_end(lexer);
advance(lexer);
unsigned hash_count = 0;
while (lexer->lookahead == '#' && hash_count < scanner->opening_hash_count) {
advance(lexer);
hash_count++;
}
if (hash_count == scanner->opening_hash_count) {
lexer->result_symbol = RAW_STRING_LITERAL_CONTENT;
return true;
}
} else {
advance(lexer);
}
}
}
static inline bool scan_raw_string_end(Scanner *scanner, TSLexer *lexer) {
advance(lexer);
for (unsigned i = 0; i < scanner->opening_hash_count; i++) {
advance(lexer);
}
lexer->result_symbol = RAW_STRING_LITERAL_END;
return true;
}
static inline bool process_float_literal(TSLexer *lexer) {
lexer->result_symbol = FLOAT_LITERAL;
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
bool has_fraction = false, has_exponent = false;
if (lexer->lookahead == '.') {
has_fraction = true;
advance(lexer);
if (iswalpha(lexer->lookahead)) {
// The dot is followed by a letter: 1.max(2) => not a float but an integer
return false;
}
if (lexer->lookahead == '.') {
return false;
}
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
}
lexer->mark_end(lexer);
if (lexer->lookahead == 'e' || lexer->lookahead == 'E') {
has_exponent = true;
advance(lexer);
if (lexer->lookahead == '+' || lexer->lookahead == '-') {
advance(lexer);
}
if (!is_num_char(lexer->lookahead)) {
return true;
}
advance(lexer);
while (is_num_char(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
}
if (!has_exponent && !has_fraction) {
return false;
}
if (lexer->lookahead != 'u' && lexer->lookahead != 'i' && lexer->lookahead != 'f') {
return true;
}
advance(lexer);
if (!iswdigit(lexer->lookahead)) {
return true;
}
while (iswdigit(lexer->lookahead)) {
advance(lexer);
}
lexer->mark_end(lexer);
return true;
}
static inline bool process_line_doc_content(TSLexer *lexer) {
lexer->result_symbol = LINE_DOC_CONTENT;
for (;;) {
if (lexer->eof(lexer)) {
return true;
}
if (lexer->lookahead == '\n') {
// Include the newline in the doc content node.
// Line endings are useful for markdown injection.
advance(lexer);
return true;
}
advance(lexer);
}
}
typedef enum {
LeftForwardSlash,
LeftAsterisk,
Continuing,
} BlockCommentState;
typedef struct {
BlockCommentState state;
unsigned nestingDepth;
} BlockCommentProcessing;
static inline void process_left_forward_slash(BlockCommentProcessing *processing, char current) {
if (current == '*') {
processing->nestingDepth += 1;
}
processing->state = Continuing;
};
static inline void process_left_asterisk(BlockCommentProcessing *processing, char current, TSLexer *lexer) {
if (current == '*') {
lexer->mark_end(lexer);
processing->state = LeftAsterisk;
return;
}
if (current == '/') {
processing->nestingDepth -= 1;
}
processing->state = Continuing;
}
static inline void process_continuing(BlockCommentProcessing *processing, char current) {
switch (current) {
case '/':
processing->state = LeftForwardSlash;
break;
case '*':
processing->state = LeftAsterisk;
break;
}
}
static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbols) {
char first = (char)lexer->lookahead;
// The first character is stored so we can safely advance inside
// these if blocks. However, because we only store one, we can only
// safely advance 1 time. Since there's a chance that an advance could
// happen in one state, we must advance in all states to ensure that
// the program ends up in a sane state prior to processing the block
// comment if need be.
if (valid_symbols[BLOCK_INNER_DOC_MARKER] && first == '!') {
lexer->result_symbol = BLOCK_INNER_DOC_MARKER;
advance(lexer);
return true;
}
if (valid_symbols[BLOCK_OUTER_DOC_MARKER] && first == '*') {
advance(lexer);
lexer->mark_end(lexer);
// If the next token is a / that means that it's an empty block comment.
if (lexer->lookahead == '/') {
return false;
}
// If the next token is a * that means that this isn't a BLOCK_OUTER_DOC_MARKER
// as BLOCK_OUTER_DOC_MARKER's only have 2 * not 3 or more.
if (lexer->lookahead != '*') {
lexer->result_symbol = BLOCK_OUTER_DOC_MARKER;
return true;
}
} else {
advance(lexer);
}
if (valid_symbols[BLOCK_COMMENT_CONTENT]) {
BlockCommentProcessing processing = {Continuing, 1};
// Manually set the current state based on the first character
switch (first) {
case '*':
processing.state = LeftAsterisk;
if (lexer->lookahead == '/') {
// This case can happen in an empty doc block comment
// like /*!*/. The comment has no contents, so bail.
return false;
}
break;
case '/':
processing.state = LeftForwardSlash;
break;
default:
processing.state = Continuing;
break;
}
// For the purposes of actually parsing rust code, this
// is incorrect as it considers an unterminated block comment
// to be an error. However, for the purposes of syntax highlighting
// this should be considered successful as otherwise you are not able
// to syntax highlight a block of code prior to closing the
// block comment
while (!lexer->eof(lexer) && processing.nestingDepth != 0) {
// Set first to the current lookahead as that is the second character
// as we force an advance in the above code when we are checking if we
// need to handle a block comment inner or outer doc comment signifier
// node
first = (char)lexer->lookahead;
switch (processing.state) {
case LeftForwardSlash:
process_left_forward_slash(&processing, first);
break;
case LeftAsterisk:
process_left_asterisk(&processing, first, lexer);
break;
case Continuing:
lexer->mark_end(lexer);
process_continuing(&processing, first);
break;
default:
break;
}
advance(lexer);
if (first == '/' && processing.nestingDepth != 0) {
lexer->mark_end(lexer);
}
}
lexer->result_symbol = BLOCK_COMMENT_CONTENT;
return true;
}
return false;
}
bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
// The documentation states that if the lexical analysis fails for some reason
// they will mark every state as valid and pass it to the external scanner
// However, we can't do anything to help them recover in that case so we
// should just fail.
/*
link: https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
If a syntax error is encountered during regular parsing, Tree-sitters
first action during error recovery will be to call the external scanners
scan function with all tokens marked valid. The scanner should detect this
case and handle it appropriately. One simple method of detection is to add
an unused token to the end of the externals array, for example
externals: $ => [$.token1, $.token2, $.error_sentinel],
then check whether that token is marked valid to determine whether
Tree-sitter is in error correction mode.
*/
if (valid_symbols[ERROR_SENTINEL]) {
return false;
}
Scanner *scanner = (Scanner *)payload;
if (valid_symbols[BLOCK_COMMENT_CONTENT] || valid_symbols[BLOCK_INNER_DOC_MARKER] ||
valid_symbols[BLOCK_OUTER_DOC_MARKER]) {
return process_block_comment(lexer, valid_symbols);
}
if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) {
return process_string(lexer);
}
if (valid_symbols[LINE_DOC_CONTENT]) {
return process_line_doc_content(lexer);
}
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
if (valid_symbols[RAW_STRING_LITERAL_START] &&
(lexer->lookahead == 'r' || lexer->lookahead == 'b' || lexer->lookahead == 'c')) {
return scan_raw_string_start(scanner, lexer);
}
if (valid_symbols[RAW_STRING_LITERAL_CONTENT]) {
return scan_raw_string_content(scanner, lexer);
}
if (valid_symbols[RAW_STRING_LITERAL_END] && lexer->lookahead == '"') {
return scan_raw_string_end(scanner, lexer);
}
if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) {
return process_float_literal(lexer);
}
return false;
}
+31
View File
@@ -0,0 +1,31 @@
{
"grammars": [
{
"name": "rune",
"camelcase": "Rune",
"scope": "source.rune",
"path": ".",
"file-types": ["rn"],
"highlights": ["queries/highlights.scm"],
"tags": ["queries/tags.scm"],
"injection-regex": "rune"
}
],
"metadata": {
"version": "0.1.0",
"license": "MIT",
"description": "Rune grammar for tree-sitter",
"authors": [],
"links": {
"repository": "https://github.com/TODO/tree-sitter-rune"
}
},
"bindings": {
"c": true,
"go": true,
"node": true,
"python": true,
"rust": true,
"swift": false
}
}