chore: scaffold tree-sitter-rune project
This commit is contained in:
@@ -0,0 +1,15 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
indent_size = 2
|
||||
indent_style = space
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[*.scm]
|
||||
indent_size = 2
|
||||
|
||||
[Makefile]
|
||||
indent_style = tab
|
||||
+40
@@ -0,0 +1,40 @@
|
||||
# Rust artifacts
|
||||
target/
|
||||
|
||||
# Node artifacts
|
||||
build/
|
||||
prebuilds/
|
||||
node_modules/
|
||||
|
||||
# Swift artifacts
|
||||
.build/
|
||||
|
||||
# Go artifacts
|
||||
_obj/
|
||||
|
||||
# Python artifacts
|
||||
.venv/
|
||||
dist/
|
||||
*.egg-info
|
||||
*.whl
|
||||
|
||||
# C artifacts
|
||||
*.a
|
||||
*.so
|
||||
*.so.*
|
||||
*.dylib
|
||||
*.dll
|
||||
*.pc
|
||||
|
||||
# Example dirs
|
||||
/examples/*/
|
||||
|
||||
# Grammar volatiles
|
||||
*.wasm
|
||||
*.obj
|
||||
*.o
|
||||
|
||||
# Archives
|
||||
*.tar.gz
|
||||
*.tgz
|
||||
*.zip
|
||||
+32
@@ -0,0 +1,32 @@
|
||||
[package]
|
||||
name = "tree-sitter-rune"
|
||||
version = "0.1.0"
|
||||
description = "Rune grammar for tree-sitter"
|
||||
authors = []
|
||||
license = "MIT"
|
||||
edition = "2021"
|
||||
rust-version = "1.65"
|
||||
keywords = ["parser", "tree-sitter", "rune"]
|
||||
include = [
|
||||
"bindings/rust/*",
|
||||
"queries/*",
|
||||
"src/*",
|
||||
"grammar.js",
|
||||
"tree-sitter.json",
|
||||
]
|
||||
|
||||
[lib]
|
||||
path = "bindings/rust/lib.rs"
|
||||
|
||||
[[example]]
|
||||
name = "parse"
|
||||
path = "bindings/rust/examples/parse.rs"
|
||||
|
||||
[dependencies]
|
||||
tree-sitter-language = "0.1"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.1"
|
||||
|
||||
[dev-dependencies]
|
||||
tree-sitter = "0.25"
|
||||
+30
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"target_name": "tree_sitter_rune_binding",
|
||||
"dependencies": [
|
||||
"<!(node -p \"require('node-addon-api').targets\"):node_addon_api_except",
|
||||
],
|
||||
"include_dirs": [
|
||||
"src",
|
||||
],
|
||||
"sources": [
|
||||
"bindings/node/binding.cc",
|
||||
"src/parser.c",
|
||||
"src/scanner.c",
|
||||
],
|
||||
"conditions": [
|
||||
["OS!='win'", {
|
||||
"cflags_c": [
|
||||
"-std=c11",
|
||||
],
|
||||
}, { # OS == "win"
|
||||
"cflags_c": [
|
||||
"/std:c11",
|
||||
"/utf-8",
|
||||
],
|
||||
}],
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,20 @@
|
||||
#include <napi.h>
|
||||
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
|
||||
extern "C" TSLanguage *tree_sitter_rune();
|
||||
|
||||
// "tree-sitter", "language" hashed with BLAKE2
|
||||
const napi_type_tag LANGUAGE_TYPE_TAG = {
|
||||
0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16
|
||||
};
|
||||
|
||||
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
||||
exports["name"] = Napi::String::New(env, "rune");
|
||||
auto language = Napi::External<TSLanguage>::New(env, tree_sitter_rune());
|
||||
language.TypeTag(&LANGUAGE_TYPE_TAG);
|
||||
exports["language"] = language;
|
||||
return exports;
|
||||
}
|
||||
|
||||
NODE_API_MODULE(tree_sitter_rune_binding, Init)
|
||||
Vendored
+32
@@ -0,0 +1,32 @@
|
||||
type BaseNode = {
|
||||
type: string;
|
||||
named: boolean;
|
||||
};
|
||||
|
||||
type ChildNode = {
|
||||
multiple: boolean;
|
||||
required: boolean;
|
||||
types: BaseNode[];
|
||||
};
|
||||
|
||||
type NodeInfo =
|
||||
| {
|
||||
type: string;
|
||||
named: boolean;
|
||||
subtypes: BaseNode[];
|
||||
}
|
||||
| {
|
||||
type: string;
|
||||
named: boolean;
|
||||
fields: { [name: string]: ChildNode };
|
||||
children: ChildNode[];
|
||||
};
|
||||
|
||||
type Language = {
|
||||
name: string;
|
||||
language: unknown;
|
||||
nodeTypeInfo: NodeInfo[];
|
||||
};
|
||||
|
||||
declare const language: Language;
|
||||
export = language;
|
||||
@@ -0,0 +1,29 @@
|
||||
const root = require("path").join(__dirname, "..", "..");
|
||||
|
||||
if (process.versions?.bun) {
|
||||
const fs = require("fs");
|
||||
const os = require("os");
|
||||
const path = require("path");
|
||||
|
||||
const bindingName = "tree_sitter_rune_binding";
|
||||
const platformFolder = `${os.platform()}-${os.arch()}`;
|
||||
const prebuildsDir = path.join(root, "prebuilds", platformFolder);
|
||||
|
||||
if (fs.existsSync(prebuildsDir)) {
|
||||
const files = fs.readdirSync(prebuildsDir);
|
||||
const nodeFile = files.find((f) => f.endsWith(".node"));
|
||||
if (nodeFile) {
|
||||
module.exports = require(path.join(prebuildsDir, nodeFile));
|
||||
}
|
||||
}
|
||||
|
||||
if (!module.exports) {
|
||||
module.exports = require(`${root}/build/Release/${bindingName}.node`);
|
||||
}
|
||||
} else {
|
||||
module.exports = require("node-gyp-build")(root);
|
||||
}
|
||||
|
||||
try {
|
||||
module.exports.nodeTypeInfo = require("../../src/node-types.json");
|
||||
} catch (_) {}
|
||||
@@ -0,0 +1,26 @@
|
||||
from importlib.resources import files
|
||||
|
||||
from ._binding import language
|
||||
|
||||
|
||||
def _get_query(name, filename):
|
||||
if name in globals():
|
||||
return globals()[name]
|
||||
query = files(__name__).joinpath("queries").joinpath(filename).read_text()
|
||||
globals()[name] = query
|
||||
return query
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
if name == "HIGHLIGHTS_QUERY":
|
||||
return _get_query("HIGHLIGHTS_QUERY", "highlights.scm")
|
||||
if name == "TAGS_QUERY":
|
||||
return _get_query("TAGS_QUERY", "tags.scm")
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||
|
||||
|
||||
def __dir__():
|
||||
return [*globals(), "HIGHLIGHTS_QUERY", "TAGS_QUERY"]
|
||||
|
||||
|
||||
__all__ = ["language", "HIGHLIGHTS_QUERY", "TAGS_QUERY"]
|
||||
@@ -0,0 +1,19 @@
|
||||
fn main() {
|
||||
let src_dir = std::path::Path::new("src");
|
||||
|
||||
let mut c_config = cc::Build::new();
|
||||
c_config.std("c11").include(src_dir);
|
||||
|
||||
#[cfg(target_env = "msvc")]
|
||||
c_config.flag("-utf-8");
|
||||
|
||||
let parser_path = src_dir.join("parser.c");
|
||||
c_config.file(&parser_path);
|
||||
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
|
||||
|
||||
let scanner_path = src_dir.join("scanner.c");
|
||||
c_config.file(&scanner_path);
|
||||
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
|
||||
|
||||
c_config.compile("tree-sitter-rune");
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
use tree_sitter_language::LanguageFn;
|
||||
|
||||
extern "C" {
|
||||
fn tree_sitter_rune() -> *const ();
|
||||
}
|
||||
|
||||
/// The tree-sitter [`LanguageFn`] for this grammar.
|
||||
pub static LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_rune) };
|
||||
|
||||
/// The content of the [`node-types.json`] file for this grammar.
|
||||
///
|
||||
/// [`node-types.json`]: https://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types
|
||||
pub const NODE_TYPES: &str = include_str!("../../src/node-types.json");
|
||||
|
||||
/// The syntax highlighting query for this language.
|
||||
pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm");
|
||||
|
||||
/// The tagging query for this language.
|
||||
pub const TAGS_QUERY: &str = include_str!("../../queries/tags.scm");
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[test]
|
||||
fn test_can_load_grammar() {
|
||||
let mut parser = tree_sitter::Parser::new();
|
||||
parser
|
||||
.set_language(&super::LANGUAGE.into())
|
||||
.expect("Error loading Rune grammar");
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,252 @@
|
||||
# tree-sitter-rune Design Spec
|
||||
|
||||
## Overview
|
||||
|
||||
A tree-sitter grammar for the [Rune](https://rune-rs.github.io/) programming language, targeting integration with nvim-treesitter for syntax highlighting, indentation, code folding, and scope tracking in Neovim.
|
||||
|
||||
Rune is a dynamically-typed, Rust-inspired scripting language designed for embedding in Rust applications. It uses the `.rn` file extension.
|
||||
|
||||
## Approach
|
||||
|
||||
Fork [tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) and modify:
|
||||
|
||||
- **Remove** ~30-40% of rules related to Rust's type system, ownership model, and static typing
|
||||
- **Add** Rune-specific constructs (template literals, generators, select blocks, dynamic typing)
|
||||
- **Adapt** shared constructs (functions, control flow, patterns) to Rune's untyped signatures
|
||||
|
||||
This leverages tree-sitter-rust's battle-tested expression parsing, operator precedence, and string handling while producing a clean grammar that maps 1:1 to Rune's actual syntax.
|
||||
|
||||
## Repository
|
||||
|
||||
- **Location:** `~/ssd/git/active/tree-sitter-rune`
|
||||
- **License:** MIT
|
||||
- **Target:** nvim-treesitter submission, npm and crates.io publishing
|
||||
|
||||
## Project Structure
|
||||
|
||||
```
|
||||
tree-sitter-rune/
|
||||
├── grammar.js # Grammar definition
|
||||
├── package.json # Node project config with tree-sitter field
|
||||
├── Cargo.toml # Rust bindings
|
||||
├── binding.gyp # Node native addon build
|
||||
├── bindings/ # Language bindings (node, rust)
|
||||
├── src/ # Generated parser (not hand-edited)
|
||||
│ ├── parser.c
|
||||
│ ├── scanner.c # External scanner for template literals
|
||||
│ └── ...
|
||||
├── queries/
|
||||
│ ├── highlights.scm # Syntax highlighting captures
|
||||
│ ├── indents.scm # Auto-indentation rules
|
||||
│ ├── folds.scm # Code folding regions
|
||||
│ └── locals.scm # Scope/variable tracking
|
||||
├── test/
|
||||
│ └── corpus/ # Test cases (input -> expected tree)
|
||||
│ ├── declarations.txt
|
||||
│ ├── expressions.txt
|
||||
│ ├── literals.txt
|
||||
│ ├── patterns.txt
|
||||
│ ├── control_flow.txt
|
||||
│ ├── async.txt
|
||||
│ ├── generators.txt
|
||||
│ └── objects.txt
|
||||
├── examples/ # Real .rn files for parse validation
|
||||
├── LICENSE
|
||||
└── README.md
|
||||
```
|
||||
|
||||
## Grammar: Removals from tree-sitter-rust
|
||||
|
||||
These Rust constructs do not exist in Rune and are removed entirely:
|
||||
|
||||
| Construct | Reason |
|
||||
|---|---|
|
||||
| Lifetime annotations (`'a`, `'static`) | No borrow checker |
|
||||
| Type annotations on params/returns | Dynamically typed |
|
||||
| Generic type parameters (`<T, U>`) | No generics |
|
||||
| Trait definitions and `impl Trait` | No trait system |
|
||||
| `impl` blocks | No type-level methods |
|
||||
| Borrow/reference syntax (`&`, `&mut`) | No ownership model |
|
||||
| `where` clauses | No type constraints |
|
||||
| `unsafe` blocks | No unsafe |
|
||||
| `extern` blocks / FFI | Embedding is Rust-side |
|
||||
| `const` / `static` items | Not in Rune |
|
||||
| Type aliases (`type Foo = ...`) | No type system |
|
||||
| Union types | Not in Rune |
|
||||
| `dyn` / `Box` / smart pointer syntax | Runtime-managed |
|
||||
| Turbofish (`::<Type>`) | No generics |
|
||||
|
||||
## Grammar: Additions for Rune
|
||||
|
||||
| Construct | Details |
|
||||
|---|---|
|
||||
| Template literals | Backtick strings with `${expr}` interpolation. Requires an external scanner for nested balanced expressions. Reference: tree-sitter-javascript's template literal scanner. |
|
||||
| `select` blocks | `select { branch => expr, ... }` for async concurrency |
|
||||
| `yield` expressions | `yield value` for generators |
|
||||
| Untyped function params | `fn foo(a, b)` — parameter names only, no type annotations |
|
||||
| Untyped struct fields | `struct Foo { x, y }` — field names only |
|
||||
| Untyped enum variants | `enum E { Ok(value), Err(msg) }` — positional names, no types |
|
||||
| Async closures | `async \|\| { ... }` and `async \|x\| { ... }` |
|
||||
| Object literals | `#{ key: value }` syntax |
|
||||
| `is` / `is not` operators | Runtime type checking |
|
||||
| Byte literals | `b'x'` and `b"string"` |
|
||||
|
||||
## Grammar: Shared Constructs (Keep, Adapt)
|
||||
|
||||
These exist in both Rust and Rune with minor differences:
|
||||
|
||||
- **Functions:** Keep `fn`, `pub fn`, `async fn`. Remove return type annotations.
|
||||
- **Control flow:** `if`/`else`, `match`, `loop`, `while`, `for`, `break`, `continue`, `return` — identical syntax.
|
||||
- **Pattern matching:** Literal, tuple, struct, enum, wildcard, rest (`..`), guard clauses — same as Rust minus type patterns.
|
||||
- **Closures:** `|args| expr` — same syntax, no type annotations on params.
|
||||
- **Modules:** `mod`, `use`, `self`, `crate`, `super` — same as Rust.
|
||||
- **Visibility:** `pub`, `pub(crate)`, `pub(super)`, `pub(self)` — same as Rust.
|
||||
- **Operators:** Arithmetic, comparison, logical, assignment — same as Rust plus `is`/`is not`.
|
||||
- **Comments:** `//` line and `/* */` block — same as Rust.
|
||||
- **String literals:** Double-quoted strings with escape sequences — same as Rust.
|
||||
|
||||
## External Scanner
|
||||
|
||||
An external scanner (`src/scanner.c`) is needed for template literal parsing. The scanner must:
|
||||
|
||||
1. Recognize backtick (`` ` ``) as the start/end of a template literal
|
||||
2. Track `${` as the start of an interpolation
|
||||
3. Handle nested braces within interpolations (balanced `{}` counting)
|
||||
4. Return to template literal content after `}` closes the interpolation
|
||||
|
||||
Reference implementation: [tree-sitter-javascript's scanner.c](https://github.com/tree-sitter/tree-sitter-javascript/blob/master/src/scanner.c) handles the same construct.
|
||||
|
||||
## Query Files
|
||||
|
||||
### highlights.scm
|
||||
|
||||
Key capture groups:
|
||||
|
||||
- `@keyword` — `fn`, `let`, `pub`, `use`, `mod`, `if`, `else`, `match`, `loop`, `while`, `for`, `return`, `async`, `await`, `yield`, `select`, `break`, `continue`, `in`, `is`
|
||||
- `@function` — function definitions and calls
|
||||
- `@function.method` — method calls (chained `.method()`)
|
||||
- `@string` — regular strings and template literal content
|
||||
- `@string.special` — `${...}` interpolation delimiters
|
||||
- `@variable` — identifiers
|
||||
- `@variable.parameter` — function parameters
|
||||
- `@operator` — binary/unary operators
|
||||
- `@type` — struct/enum names (PascalCase convention)
|
||||
- `@module` — module paths
|
||||
- `@comment` — line and block comments
|
||||
- `@punctuation.bracket` — `()`, `[]`, `{}`
|
||||
- `@punctuation.delimiter` — `,`, `;`, `::`
|
||||
- `@constant.builtin` — `true`, `false`, `None`
|
||||
- `@number` — integer and float literals
|
||||
|
||||
### indents.scm
|
||||
|
||||
Block-based indentation for `{}` bodies: functions, control flow, match arms, struct/enum definitions, closures, block expressions.
|
||||
|
||||
### folds.scm
|
||||
|
||||
Foldable regions: function bodies, struct/enum bodies, match blocks, block expressions, multi-line comment blocks.
|
||||
|
||||
### locals.scm
|
||||
|
||||
Scope definitions: function bodies, block expressions, closures, for/while/loop bodies.
|
||||
Variable definitions: `let` bindings, function parameters, for-loop variables.
|
||||
References: identifier usage.
|
||||
|
||||
## Formatter Integration
|
||||
|
||||
No custom formatter — Rune ships `rune fmt` since v0.13. Document integration with conform.nvim:
|
||||
|
||||
```lua
|
||||
require("conform").setup({
|
||||
formatters_by_ft = {
|
||||
rune = { "rune_fmt" },
|
||||
},
|
||||
formatters = {
|
||||
rune_fmt = {
|
||||
command = "rune",
|
||||
args = { "fmt", "$FILENAME" },
|
||||
stdin = false,
|
||||
},
|
||||
},
|
||||
})
|
||||
```
|
||||
|
||||
This is documented in the README, not shipped as part of the grammar repo.
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Test Corpus
|
||||
|
||||
Each `.txt` file in `test/corpus/` covers a category of syntax using tree-sitter's standard format:
|
||||
|
||||
```
|
||||
==================
|
||||
Test name
|
||||
==================
|
||||
|
||||
source code here
|
||||
|
||||
---
|
||||
|
||||
(expected_syntax_tree)
|
||||
```
|
||||
|
||||
**Categories:**
|
||||
|
||||
- `declarations.txt` — fn, struct, enum, mod, use, visibility modifiers
|
||||
- `expressions.txt` — binary, unary, call, method chain, field access, index, range
|
||||
- `literals.txt` — strings, template literals, numbers, booleans, byte literals, object literals
|
||||
- `control_flow.txt` — if/else, match with guards, loops (loop/while/for), break/continue/return
|
||||
- `patterns.txt` — match patterns (literal, tuple, struct, enum, wildcard, rest, or-patterns)
|
||||
- `async.txt` — async fn, .await, select blocks, async closures
|
||||
- `generators.txt` — yield expressions, generator functions, streams
|
||||
- `objects.txt` — object literals (`#{}`), vector literals
|
||||
|
||||
### Real-World Validation
|
||||
|
||||
Parse existing `.rn` files to catch gaps:
|
||||
- `~/.config/owlry/plugins/hyprshutdown/main.rn`
|
||||
- Future owlry plugin files as they're written
|
||||
|
||||
### Validation Criteria
|
||||
|
||||
- All test corpus files pass `tree-sitter test`
|
||||
- Real-world `.rn` files parse without errors
|
||||
- Highlight queries produce sensible coloring in Neovim
|
||||
- No parser crashes on malformed input (graceful error recovery)
|
||||
|
||||
## Development Phases
|
||||
|
||||
### Phase 1: Core Grammar
|
||||
|
||||
Fork tree-sitter-rust, strip type system rules, adapt function/struct/enum definitions to Rune's untyped signatures. Target: parse `main.rn` successfully.
|
||||
|
||||
### Phase 2: Rune-Specific Constructs
|
||||
|
||||
Add template literals (external scanner), select blocks, yield expressions, async closures, object literals, `is`/`is not`. Build test corpus alongside.
|
||||
|
||||
### Phase 3: Query Files
|
||||
|
||||
Write `highlights.scm`, `indents.scm`, `folds.scm`, `locals.scm`. Validate in Neovim with real files.
|
||||
|
||||
### Phase 4: Polish & Publish
|
||||
|
||||
- Complete test corpus coverage
|
||||
- README with installation and usage instructions
|
||||
- MIT LICENSE
|
||||
- Submit PR to nvim-treesitter to register parser
|
||||
- Publish to npm (`tree-sitter-rune`) and crates.io
|
||||
|
||||
## nvim-treesitter Submission Requirements
|
||||
|
||||
- Grammar hosted on public Git repository
|
||||
- `highlights.scm` included at minimum
|
||||
- Parser handles real-world code without crashing
|
||||
- `package.json` contains `tree-sitter` field with grammar metadata
|
||||
- Maintained and responsive to issues
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **Build time:** Node.js (for `tree-sitter generate`), tree-sitter CLI, C compiler
|
||||
- **Runtime:** None (compiled to a `.so` shared library)
|
||||
- **Formatter:** `rune` CLI (installed separately by the user)
|
||||
+1695
File diff suppressed because it is too large
Load Diff
Generated
+63
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"name": "tree-sitter-rune",
|
||||
"version": "0.1.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "tree-sitter-rune",
|
||||
"version": "0.1.0",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"node-addon-api": "^8.2.2",
|
||||
"node-gyp-build": "^4.8.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"tree-sitter-cli": "^0.26.7"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"tree-sitter": "^0.25.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"tree-sitter": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/node-addon-api": {
|
||||
"version": "8.7.0",
|
||||
"resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.7.0.tgz",
|
||||
"integrity": "sha512-9MdFxmkKaOYVTV+XVRG8ArDwwQ77XIgIPyKASB1k3JPq3M8fGQQQE3YpMOrKm6g//Ktx8ivZr8xo1Qmtqub+GA==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": "^18 || ^20 || >= 21"
|
||||
}
|
||||
},
|
||||
"node_modules/node-gyp-build": {
|
||||
"version": "4.8.4",
|
||||
"resolved": "https://registry.npmjs.org/node-gyp-build/-/node-gyp-build-4.8.4.tgz",
|
||||
"integrity": "sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==",
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"node-gyp-build": "bin.js",
|
||||
"node-gyp-build-optional": "optional.js",
|
||||
"node-gyp-build-test": "build-test.js"
|
||||
}
|
||||
},
|
||||
"node_modules/tree-sitter-cli": {
|
||||
"version": "0.26.7",
|
||||
"resolved": "https://registry.npmjs.org/tree-sitter-cli/-/tree-sitter-cli-0.26.7.tgz",
|
||||
"integrity": "sha512-fOg/DokJr/gW7suy9IypT1MQon28+JxOmtjudrT04rHRyqVJzrvakrojwyU8r0U6UNRsZKilb8VIhyarv2XUkQ==",
|
||||
"dev": true,
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"bin": {
|
||||
"tree-sitter": "cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=12.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
{
|
||||
"name": "tree-sitter-rune",
|
||||
"version": "0.1.0",
|
||||
"description": "Rune grammar for tree-sitter",
|
||||
"main": "bindings/node",
|
||||
"types": "bindings/node",
|
||||
"keywords": [
|
||||
"parser",
|
||||
"tree-sitter",
|
||||
"rune"
|
||||
],
|
||||
"files": [
|
||||
"grammar.js",
|
||||
"binding.gyp",
|
||||
"prebuilds/",
|
||||
"queries/",
|
||||
"src/",
|
||||
"bindings/node"
|
||||
],
|
||||
"dependencies": {
|
||||
"node-addon-api": "^8.2.2",
|
||||
"node-gyp-build": "^4.8.4"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"tree-sitter": "^0.25.0"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"tree-sitter": {
|
||||
"optional": true
|
||||
}
|
||||
},
|
||||
"devDependencies": {
|
||||
"tree-sitter-cli": "^0.26.7"
|
||||
},
|
||||
"scripts": {
|
||||
"install": "node-gyp-build",
|
||||
"test": "node --test bindings/node/test",
|
||||
"parse": "tree-sitter parse",
|
||||
"generate": "tree-sitter generate"
|
||||
},
|
||||
"tree-sitter": [
|
||||
{
|
||||
"scope": "source.rune",
|
||||
"file-types": [
|
||||
"rn"
|
||||
],
|
||||
"highlights": "queries/highlights.scm",
|
||||
"tags": "queries/tags.scm",
|
||||
"injection-regex": "rune"
|
||||
}
|
||||
],
|
||||
"license": "MIT"
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
; Highlights for Rune
|
||||
; (populated in Task 10)
|
||||
@@ -0,0 +1,2 @@
|
||||
; Tags for Rune
|
||||
; (populated in Task 11)
|
||||
+393
@@ -0,0 +1,393 @@
|
||||
#include "tree_sitter/alloc.h"
|
||||
#include "tree_sitter/parser.h"
|
||||
|
||||
#include <wctype.h>
|
||||
|
||||
enum TokenType {
|
||||
STRING_CONTENT,
|
||||
RAW_STRING_LITERAL_START,
|
||||
RAW_STRING_LITERAL_CONTENT,
|
||||
RAW_STRING_LITERAL_END,
|
||||
FLOAT_LITERAL,
|
||||
BLOCK_OUTER_DOC_MARKER,
|
||||
BLOCK_INNER_DOC_MARKER,
|
||||
BLOCK_COMMENT_CONTENT,
|
||||
LINE_DOC_CONTENT,
|
||||
ERROR_SENTINEL
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
uint8_t opening_hash_count;
|
||||
} Scanner;
|
||||
|
||||
void *tree_sitter_rust_external_scanner_create() { return ts_calloc(1, sizeof(Scanner)); }
|
||||
|
||||
void tree_sitter_rust_external_scanner_destroy(void *payload) { ts_free((Scanner *)payload); }
|
||||
|
||||
unsigned tree_sitter_rust_external_scanner_serialize(void *payload, char *buffer) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
buffer[0] = (char)scanner->opening_hash_count;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void tree_sitter_rust_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
scanner->opening_hash_count = 0;
|
||||
if (length == 1) {
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
scanner->opening_hash_count = buffer[0];
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool is_num_char(int32_t c) { return c == '_' || iswdigit(c); }
|
||||
|
||||
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
||||
|
||||
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
||||
|
||||
static inline bool process_string(TSLexer *lexer) {
|
||||
bool has_content = false;
|
||||
for (;;) {
|
||||
if (lexer->lookahead == '\"' || lexer->lookahead == '\\') {
|
||||
break;
|
||||
}
|
||||
if (lexer->eof(lexer)) {
|
||||
return false;
|
||||
}
|
||||
has_content = true;
|
||||
advance(lexer);
|
||||
}
|
||||
lexer->result_symbol = STRING_CONTENT;
|
||||
lexer->mark_end(lexer);
|
||||
return has_content;
|
||||
}
|
||||
|
||||
static inline bool scan_raw_string_start(Scanner *scanner, TSLexer *lexer) {
|
||||
if (lexer->lookahead == 'b' || lexer->lookahead == 'c') {
|
||||
advance(lexer);
|
||||
}
|
||||
if (lexer->lookahead != 'r') {
|
||||
return false;
|
||||
}
|
||||
advance(lexer);
|
||||
|
||||
uint8_t opening_hash_count = 0;
|
||||
while (lexer->lookahead == '#') {
|
||||
advance(lexer);
|
||||
opening_hash_count++;
|
||||
}
|
||||
|
||||
if (lexer->lookahead != '"') {
|
||||
return false;
|
||||
}
|
||||
advance(lexer);
|
||||
scanner->opening_hash_count = opening_hash_count;
|
||||
|
||||
lexer->result_symbol = RAW_STRING_LITERAL_START;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) {
|
||||
for (;;) {
|
||||
if (lexer->eof(lexer)) {
|
||||
return false;
|
||||
}
|
||||
if (lexer->lookahead == '"') {
|
||||
lexer->mark_end(lexer);
|
||||
advance(lexer);
|
||||
unsigned hash_count = 0;
|
||||
while (lexer->lookahead == '#' && hash_count < scanner->opening_hash_count) {
|
||||
advance(lexer);
|
||||
hash_count++;
|
||||
}
|
||||
if (hash_count == scanner->opening_hash_count) {
|
||||
lexer->result_symbol = RAW_STRING_LITERAL_CONTENT;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool scan_raw_string_end(Scanner *scanner, TSLexer *lexer) {
|
||||
advance(lexer);
|
||||
for (unsigned i = 0; i < scanner->opening_hash_count; i++) {
|
||||
advance(lexer);
|
||||
}
|
||||
lexer->result_symbol = RAW_STRING_LITERAL_END;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool process_float_literal(TSLexer *lexer) {
|
||||
lexer->result_symbol = FLOAT_LITERAL;
|
||||
|
||||
advance(lexer);
|
||||
while (is_num_char(lexer->lookahead)) {
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
bool has_fraction = false, has_exponent = false;
|
||||
|
||||
if (lexer->lookahead == '.') {
|
||||
has_fraction = true;
|
||||
advance(lexer);
|
||||
if (iswalpha(lexer->lookahead)) {
|
||||
// The dot is followed by a letter: 1.max(2) => not a float but an integer
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '.') {
|
||||
return false;
|
||||
}
|
||||
while (is_num_char(lexer->lookahead)) {
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
|
||||
if (lexer->lookahead == 'e' || lexer->lookahead == 'E') {
|
||||
has_exponent = true;
|
||||
advance(lexer);
|
||||
if (lexer->lookahead == '+' || lexer->lookahead == '-') {
|
||||
advance(lexer);
|
||||
}
|
||||
if (!is_num_char(lexer->lookahead)) {
|
||||
return true;
|
||||
}
|
||||
advance(lexer);
|
||||
while (is_num_char(lexer->lookahead)) {
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
|
||||
if (!has_exponent && !has_fraction) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (lexer->lookahead != 'u' && lexer->lookahead != 'i' && lexer->lookahead != 'f') {
|
||||
return true;
|
||||
}
|
||||
advance(lexer);
|
||||
if (!iswdigit(lexer->lookahead)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
while (iswdigit(lexer->lookahead)) {
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
lexer->mark_end(lexer);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool process_line_doc_content(TSLexer *lexer) {
|
||||
lexer->result_symbol = LINE_DOC_CONTENT;
|
||||
for (;;) {
|
||||
if (lexer->eof(lexer)) {
|
||||
return true;
|
||||
}
|
||||
if (lexer->lookahead == '\n') {
|
||||
// Include the newline in the doc content node.
|
||||
// Line endings are useful for markdown injection.
|
||||
advance(lexer);
|
||||
return true;
|
||||
}
|
||||
advance(lexer);
|
||||
}
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
LeftForwardSlash,
|
||||
LeftAsterisk,
|
||||
Continuing,
|
||||
} BlockCommentState;
|
||||
|
||||
typedef struct {
|
||||
BlockCommentState state;
|
||||
unsigned nestingDepth;
|
||||
} BlockCommentProcessing;
|
||||
|
||||
static inline void process_left_forward_slash(BlockCommentProcessing *processing, char current) {
|
||||
if (current == '*') {
|
||||
processing->nestingDepth += 1;
|
||||
}
|
||||
processing->state = Continuing;
|
||||
};
|
||||
|
||||
static inline void process_left_asterisk(BlockCommentProcessing *processing, char current, TSLexer *lexer) {
|
||||
if (current == '*') {
|
||||
lexer->mark_end(lexer);
|
||||
processing->state = LeftAsterisk;
|
||||
return;
|
||||
}
|
||||
|
||||
if (current == '/') {
|
||||
processing->nestingDepth -= 1;
|
||||
}
|
||||
|
||||
processing->state = Continuing;
|
||||
}
|
||||
|
||||
static inline void process_continuing(BlockCommentProcessing *processing, char current) {
|
||||
switch (current) {
|
||||
case '/':
|
||||
processing->state = LeftForwardSlash;
|
||||
break;
|
||||
case '*':
|
||||
processing->state = LeftAsterisk;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool process_block_comment(TSLexer *lexer, const bool *valid_symbols) {
|
||||
char first = (char)lexer->lookahead;
|
||||
// The first character is stored so we can safely advance inside
|
||||
// these if blocks. However, because we only store one, we can only
|
||||
// safely advance 1 time. Since there's a chance that an advance could
|
||||
// happen in one state, we must advance in all states to ensure that
|
||||
// the program ends up in a sane state prior to processing the block
|
||||
// comment if need be.
|
||||
if (valid_symbols[BLOCK_INNER_DOC_MARKER] && first == '!') {
|
||||
lexer->result_symbol = BLOCK_INNER_DOC_MARKER;
|
||||
advance(lexer);
|
||||
return true;
|
||||
}
|
||||
if (valid_symbols[BLOCK_OUTER_DOC_MARKER] && first == '*') {
|
||||
advance(lexer);
|
||||
lexer->mark_end(lexer);
|
||||
// If the next token is a / that means that it's an empty block comment.
|
||||
if (lexer->lookahead == '/') {
|
||||
return false;
|
||||
}
|
||||
// If the next token is a * that means that this isn't a BLOCK_OUTER_DOC_MARKER
|
||||
// as BLOCK_OUTER_DOC_MARKER's only have 2 * not 3 or more.
|
||||
if (lexer->lookahead != '*') {
|
||||
lexer->result_symbol = BLOCK_OUTER_DOC_MARKER;
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
advance(lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[BLOCK_COMMENT_CONTENT]) {
|
||||
BlockCommentProcessing processing = {Continuing, 1};
|
||||
// Manually set the current state based on the first character
|
||||
switch (first) {
|
||||
case '*':
|
||||
processing.state = LeftAsterisk;
|
||||
if (lexer->lookahead == '/') {
|
||||
// This case can happen in an empty doc block comment
|
||||
// like /*!*/. The comment has no contents, so bail.
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case '/':
|
||||
processing.state = LeftForwardSlash;
|
||||
break;
|
||||
default:
|
||||
processing.state = Continuing;
|
||||
break;
|
||||
}
|
||||
|
||||
// For the purposes of actually parsing rust code, this
|
||||
// is incorrect as it considers an unterminated block comment
|
||||
// to be an error. However, for the purposes of syntax highlighting
|
||||
// this should be considered successful as otherwise you are not able
|
||||
// to syntax highlight a block of code prior to closing the
|
||||
// block comment
|
||||
while (!lexer->eof(lexer) && processing.nestingDepth != 0) {
|
||||
// Set first to the current lookahead as that is the second character
|
||||
// as we force an advance in the above code when we are checking if we
|
||||
// need to handle a block comment inner or outer doc comment signifier
|
||||
// node
|
||||
first = (char)lexer->lookahead;
|
||||
switch (processing.state) {
|
||||
case LeftForwardSlash:
|
||||
process_left_forward_slash(&processing, first);
|
||||
break;
|
||||
case LeftAsterisk:
|
||||
process_left_asterisk(&processing, first, lexer);
|
||||
break;
|
||||
case Continuing:
|
||||
lexer->mark_end(lexer);
|
||||
process_continuing(&processing, first);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
advance(lexer);
|
||||
if (first == '/' && processing.nestingDepth != 0) {
|
||||
lexer->mark_end(lexer);
|
||||
}
|
||||
}
|
||||
lexer->result_symbol = BLOCK_COMMENT_CONTENT;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool tree_sitter_rust_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
|
||||
// The documentation states that if the lexical analysis fails for some reason
|
||||
// they will mark every state as valid and pass it to the external scanner
|
||||
// However, we can't do anything to help them recover in that case so we
|
||||
// should just fail.
|
||||
/*
|
||||
link: https://tree-sitter.github.io/tree-sitter/creating-parsers#external-scanners
|
||||
If a syntax error is encountered during regular parsing, Tree-sitter’s
|
||||
first action during error recovery will be to call the external scanner’s
|
||||
scan function with all tokens marked valid. The scanner should detect this
|
||||
case and handle it appropriately. One simple method of detection is to add
|
||||
an unused token to the end of the externals array, for example
|
||||
|
||||
externals: $ => [$.token1, $.token2, $.error_sentinel],
|
||||
|
||||
then check whether that token is marked valid to determine whether
|
||||
Tree-sitter is in error correction mode.
|
||||
*/
|
||||
if (valid_symbols[ERROR_SENTINEL]) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Scanner *scanner = (Scanner *)payload;
|
||||
|
||||
if (valid_symbols[BLOCK_COMMENT_CONTENT] || valid_symbols[BLOCK_INNER_DOC_MARKER] ||
|
||||
valid_symbols[BLOCK_OUTER_DOC_MARKER]) {
|
||||
return process_block_comment(lexer, valid_symbols);
|
||||
}
|
||||
|
||||
if (valid_symbols[STRING_CONTENT] && !valid_symbols[FLOAT_LITERAL]) {
|
||||
return process_string(lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[LINE_DOC_CONTENT]) {
|
||||
return process_line_doc_content(lexer);
|
||||
}
|
||||
|
||||
while (iswspace(lexer->lookahead)) {
|
||||
skip(lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[RAW_STRING_LITERAL_START] &&
|
||||
(lexer->lookahead == 'r' || lexer->lookahead == 'b' || lexer->lookahead == 'c')) {
|
||||
return scan_raw_string_start(scanner, lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[RAW_STRING_LITERAL_CONTENT]) {
|
||||
return scan_raw_string_content(scanner, lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[RAW_STRING_LITERAL_END] && lexer->lookahead == '"') {
|
||||
return scan_raw_string_end(scanner, lexer);
|
||||
}
|
||||
|
||||
if (valid_symbols[FLOAT_LITERAL] && iswdigit(lexer->lookahead)) {
|
||||
return process_float_literal(lexer);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"grammars": [
|
||||
{
|
||||
"name": "rune",
|
||||
"camelcase": "Rune",
|
||||
"scope": "source.rune",
|
||||
"path": ".",
|
||||
"file-types": ["rn"],
|
||||
"highlights": ["queries/highlights.scm"],
|
||||
"tags": ["queries/tags.scm"],
|
||||
"injection-regex": "rune"
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"version": "0.1.0",
|
||||
"license": "MIT",
|
||||
"description": "Rune grammar for tree-sitter",
|
||||
"authors": [],
|
||||
"links": {
|
||||
"repository": "https://github.com/TODO/tree-sitter-rune"
|
||||
}
|
||||
},
|
||||
"bindings": {
|
||||
"c": true,
|
||||
"go": true,
|
||||
"node": true,
|
||||
"python": true,
|
||||
"rust": true,
|
||||
"swift": false
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user