perf(filesearch): make file search opt-in and bound the fd walk

File search shelled out to `fd`/`locate` on every auto-mode keystroke,
walking $HOME and serialising behind a single daemon connection — observed
0.9–1.3s per keystroke, stalling every other provider's results. Gate it
behind explicit intent and bound the walk.

- DynamicProvider::runs_in_auto_mode() (default true); filesearch returns
  false, so it only runs when its `:file` prefix is active — never on a
  bare auto-mode query
- route `/` as a `:file` prefix alias in parse_query (identical ParsedQuery)
- drop the colon-less `file `/`find ` auto-triggers; `:file` / `/` cover it
  (also fixes `:file find me` searching for "me")
- bound fd: --max-depth, --exclude .git/node_modules/target/.cache,
  20 results, 750ms kill-timeout (spawn + drained stdout + reap)
- configurable providers.filesearch_roots (TOML + owlry.set; `~/` expanded,
  non-existent entries dropped); empty searches $HOME

Auto-mode typing drops from up to 1.3s to ~2–20ms; file search stays one
keystroke away.
This commit is contained in:
2026-06-01 16:55:31 +02:00
parent 5ae5bb8b87
commit ffa82bfed8
10 changed files with 417 additions and 66 deletions
+6
View File
@@ -202,6 +202,11 @@ pub struct ProvidersConfig {
/// Or a custom URL with a {query} placeholder
#[serde(default = "default_search_engine")]
pub search_engine: String,
/// Root directories for file search (`:file` / `/`). Each entry may use a
/// leading `~/` for the home directory. Empty (the default) searches
/// `$HOME`.
#[serde(default)]
pub filesearch_roots: Vec<String>,
}
impl Default for ProvidersConfig {
@@ -221,6 +226,7 @@ impl Default for ProvidersConfig {
frecency: true,
frecency_weight: 0.3,
search_engine: "duckduckgo".to_string(),
filesearch_roots: Vec::new(),
}
}
}
+39
View File
@@ -216,6 +216,18 @@ impl ProviderFilter {
}
}
// `/` is a shorthand alias for the `:file` prefix. `/foo` and `/ foo`
// both route to file search with term "foo"; a bare `/` is a partial
// prefix with an empty query. This produces an identical ParsedQuery to
// `:file foo`, so the daemon path is shared.
if let Some(rest) = trimmed.strip_prefix('/') {
return ParsedQuery {
prefix: Some(ProviderType::Plugin("filesearch".to_string())),
tag_filter: None,
query: rest.trim_start().to_string(),
};
}
// Core prefixes — each entry is tried as ":name " (full) and ":name" (partial)
let core_prefixes: &[(&str, ProviderType)] = &[
("app", ProviderType::Application),
@@ -483,6 +495,33 @@ mod tests {
assert_eq!(result.query, "5+3");
}
#[test]
fn slash_alias_routes_to_filesearch_like_file_prefix() {
// `/foo` must produce the same ParsedQuery as `:file foo` so file
// search is reachable without typing the full prefix.
let slash = ProviderFilter::parse_query("/foo");
let file = ProviderFilter::parse_query(":file foo");
assert_eq!(slash.prefix, file.prefix);
assert_eq!(slash.query, file.query);
assert_eq!(
slash.prefix,
Some(ProviderType::Plugin("filesearch".to_string()))
);
assert_eq!(slash.query, "foo");
// `/ foo` (space after slash) trims to the same term.
let spaced = ProviderFilter::parse_query("/ foo");
assert_eq!(spaced.query, "foo");
// Bare `/` is a partial prefix with an empty query.
let bare = ProviderFilter::parse_query("/");
assert_eq!(
bare.prefix,
Some(ProviderType::Plugin("filesearch".to_string()))
);
assert_eq!(bare.query, "");
}
#[test]
fn parse_query_routes_unknown_prefix_to_plugin_type_id() {
// Locks in the dynamic-prefix-fallback path (filter.rs:319-347): a
+3
View File
@@ -145,6 +145,9 @@ fn apply_set(cfg: &mut LuaConfig, t: Table) -> mlua::Result<()> {
if let Some(v) = t.get::<Option<String>>("search_engine")? {
cfg.search_engine = Some(v);
}
if let Some(v) = t.get::<Option<Vec<String>>>("filesearch_roots")? {
cfg.filesearch_roots = Some(v);
}
// Record unknown keys (string-keyed, non-known) for `config validate`.
for pair in t.pairs::<mlua::Value, mlua::Value>() {
+20
View File
@@ -28,6 +28,7 @@ pub struct LuaConfig {
pub frecency: Option<bool>,
pub frecency_weight: Option<f64>,
pub search_engine: Option<String>,
pub filesearch_roots: Option<Vec<String>>,
/// Keys passed to `owlry.set` that we don't recognise. Surfaced by
/// `owlry config validate` (Phase 3.9). Not an error — forward-compat.
@@ -135,6 +136,7 @@ pub(crate) const KNOWN_SET_KEYS: &[&str] = &[
"frecency",
"frecency_weight",
"search_engine",
"filesearch_roots",
];
/// Known keys accepted by `owlry.theme { ... }`. Mirrors the fields of
@@ -211,6 +213,9 @@ impl LuaConfig {
if let Some(v) = &self.search_engine {
cfg.providers.search_engine = v.clone();
}
if let Some(v) = &self.filesearch_roots {
cfg.providers.filesearch_roots = v.clone();
}
// ── owlry.providers ────────────────────────────────────────────
if let Some(list) = &self.providers {
@@ -401,6 +406,21 @@ mod tests {
assert_eq!(cfg.providers.search_engine, "google");
}
#[test]
fn merge_overrides_filesearch_roots() {
let mut cfg = Config::default();
assert!(cfg.providers.filesearch_roots.is_empty());
let lc = LuaConfig {
filesearch_roots: Some(vec!["~/Documents".into(), "~/code".into()]),
..Default::default()
};
lc.merge_into(&mut cfg);
assert_eq!(
cfg.providers.filesearch_roots,
vec!["~/Documents".to_string(), "~/code".to_string()]
);
}
#[test]
fn providers_list_enables_only_listed_ids() {
let mut cfg = Config::default();
+9
View File
@@ -354,6 +354,15 @@ fn emit_set_section(out: &mut String, cfg: &Config) {
if cfg.providers.search_engine != d.providers.search_engine {
entries.push(("search_engine", lua_string(&cfg.providers.search_engine)));
}
if cfg.providers.filesearch_roots != d.providers.filesearch_roots {
let items: Vec<String> = cfg
.providers
.filesearch_roots
.iter()
.map(|r| lua_string(r))
.collect();
entries.push(("filesearch_roots", format!("{{ {} }}", items.join(", "))));
}
if entries.is_empty() {
return;
+235 -65
View File
@@ -1,19 +1,31 @@
//! File search provider.
//!
//! Dynamic provider that searches for files using `fd` (preferred) or
//! `locate`. Triggered by:
//! - `/ name` / `/name` (slash prefix)
//! - `file name` / `find name` (word prefix)
//! `locate`. Because each query shells out and walks the filesystem, it is
//! **opt-in only** — it never contributes to bare auto-mode queries (see
//! [`DynamicProvider::runs_in_auto_mode`]). Triggered by:
//! - `:file name` / `:files name` / `:find name` (prefix)
//! - `/ name` / `/name` (slash alias, routed to the `:file` prefix by
//! [`crate::filter::ProviderFilter::parse_query`])
//!
//! External dependencies:
//! - `fd` (preferred) or `locate`
use super::{DynamicProvider, ItemSource, LaunchItem, ProviderType};
use std::io::Read;
use std::path::Path;
use std::process::Command;
use std::process::{Command, Stdio};
use std::time::{Duration, Instant};
const TYPE_ID: &str = "filesearch";
const MAX_RESULTS: usize = 20;
/// Cap recursion depth so a single query can't walk an unbounded tree.
const MAX_DEPTH: usize = 8;
/// Hard wall-clock cap per query — defends against slow/network mounts.
const QUERY_TIMEOUT: Duration = Duration::from_millis(750);
/// Noise directories pruned from the walk (`fd` already skips hidden and
/// `.gitignore`d paths by default; these cover the non-hidden offenders).
const EXCLUDES: &[&str] = &[".git", "node_modules", "target", ".cache"];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SearchTool {
@@ -22,11 +34,13 @@ enum SearchTool {
None,
}
/// Dynamic file search provider — shells out to `fd` or `locate` per keystroke.
/// Dynamic file search provider — shells out to `fd` or `locate` on explicit
/// `:file` / `/` queries (never in auto mode).
pub(crate) struct FileSearchProvider {
search_tool: SearchTool,
// TODO(v2.x): plumb via constructor (search roots, extra flags).
home: String,
/// Resolved, existing search roots. Empty means "search `$HOME`".
roots: Vec<String>,
}
impl Default for FileSearchProvider {
@@ -36,14 +50,108 @@ impl Default for FileSearchProvider {
}
impl FileSearchProvider {
/// Construct with the default root (`$HOME`).
pub fn new() -> Self {
Self::with_roots(Vec::new())
}
/// Construct with explicit search roots (from `providers.filesearch_roots`).
/// Entries may use a leading `~/`; non-directory entries are dropped. An
/// empty resolved set falls back to `$HOME`.
pub fn with_roots(roots: Vec<String>) -> Self {
let search_tool = Self::detect_search_tool();
// TODO(v2.x): plumb via constructor.
let home = dirs::home_dir()
.map(|p| p.to_string_lossy().to_string())
.unwrap_or_else(|| "/".to_string());
let roots = Self::resolve_roots(roots, &home);
Self { search_tool, home }
Self {
search_tool,
home,
roots,
}
}
/// Expand `~`/`~/` against `home` and keep only entries that are existing
/// directories.
fn resolve_roots(roots: Vec<String>, home: &str) -> Vec<String> {
roots
.into_iter()
.map(|r| Self::expand_tilde(r.trim(), home))
.filter(|p| !p.is_empty() && Path::new(p).is_dir())
.collect()
}
fn expand_tilde(path: &str, home: &str) -> String {
if path == "~" {
home.to_string()
} else if let Some(rest) = path.strip_prefix("~/") {
format!("{}/{}", home, rest)
} else {
path.to_string()
}
}
/// Assemble the `fd` argument vector with bounds and roots. Pure so the
/// bounding can be unit-tested without spawning `fd`.
fn fd_args(pattern: &str, roots: &[String]) -> Vec<String> {
let mut args = vec![
"--max-results".to_string(),
MAX_RESULTS.to_string(),
"--max-depth".to_string(),
MAX_DEPTH.to_string(),
"--type".to_string(),
"f".to_string(),
"--type".to_string(),
"d".to_string(),
];
for ex in EXCLUDES {
args.push("--exclude".to_string());
args.push((*ex).to_string());
}
args.push(pattern.to_string());
args.extend(roots.iter().cloned());
args
}
/// Run `cmd`, draining stdout on a thread (so a full pipe can't deadlock
/// the child) and killing+reaping it past `timeout`. Returns `None` on
/// spawn failure or timeout.
fn run_bounded(mut cmd: Command, timeout: Duration) -> Option<String> {
let mut child = cmd
.stdout(Stdio::piped())
.stderr(Stdio::null())
.spawn()
.ok()?;
let mut stdout = child.stdout.take()?;
let reader = std::thread::spawn(move || {
let mut buf = String::new();
let _ = stdout.read_to_string(&mut buf);
buf
});
let deadline = Instant::now() + timeout;
loop {
match child.try_wait() {
Ok(Some(_status)) => return reader.join().ok(),
Ok(None) => {
if Instant::now() >= deadline {
let _ = child.kill();
let _ = child.wait();
let _ = reader.join();
return None;
}
std::thread::sleep(Duration::from_millis(15));
}
Err(_) => {
let _ = child.kill();
let _ = child.wait();
let _ = reader.join();
return None;
}
}
}
}
fn detect_search_tool() -> SearchTool {
@@ -67,22 +175,16 @@ impl FileSearchProvider {
}
/// Extract the search term from the query.
///
/// The trigger markers (`:file`, `/`) are stripped upstream by
/// [`crate::filter::ProviderFilter::parse_query`], so the term arrives
/// clean — this just trims and rejects an empty term.
fn extract_search_term(query: &str) -> Option<&str> {
let trimmed = query.trim();
if let Some(rest) = trimmed.strip_prefix("/ ") {
Some(rest.trim())
} else if let Some(rest) = trimmed.strip_prefix('/') {
Some(rest.trim())
if trimmed.is_empty() {
None
} else {
// Handle "file " and "find " prefixes (case-insensitive), or raw
// query in filter mode.
let lower = trimmed.to_lowercase();
if lower.starts_with("file ") || lower.starts_with("find ") {
Some(trimmed[5..].trim())
} else {
Some(trimmed)
}
Some(trimmed)
}
}
@@ -104,41 +206,35 @@ impl FileSearchProvider {
}
fn search_with_fd(&self, pattern: &str) -> Vec<LaunchItem> {
let output = match Command::new("fd")
.args([
"--max-results",
&MAX_RESULTS.to_string(),
"--type",
"f", // Files only
"--type",
"d", // And directories
pattern,
])
.current_dir(&self.home)
.output()
{
Ok(o) => o,
Err(_) => return Vec::new(),
};
let mut cmd = Command::new("fd");
cmd.args(Self::fd_args(pattern, &self.roots));
// With no explicit roots, search from $HOME; otherwise the roots are
// passed as positional paths and the cwd is irrelevant.
if self.roots.is_empty() {
cmd.current_dir(&self.home);
}
self.parse_file_results(&String::from_utf8_lossy(&output.stdout))
match Self::run_bounded(cmd, QUERY_TIMEOUT) {
Some(out) => self.parse_file_results(&out),
None => Vec::new(),
}
}
fn search_with_locate(&self, pattern: &str) -> Vec<LaunchItem> {
let output = match Command::new("locate")
.args([
"--limit",
&MAX_RESULTS.to_string(),
"--ignore-case",
pattern,
])
.output()
{
Ok(o) => o,
Err(_) => return Vec::new(),
};
// `locate` queries a prebuilt index, so roots/depth don't apply; it
// still gets the wall-clock cap.
let mut cmd = Command::new("locate");
cmd.args([
"--limit",
&MAX_RESULTS.to_string(),
"--ignore-case",
pattern,
]);
self.parse_file_results(&String::from_utf8_lossy(&output.stdout))
match Self::run_bounded(cmd, QUERY_TIMEOUT) {
Some(out) => self.parse_file_results(&out),
None => Vec::new(),
}
}
fn parse_file_results(&self, output: &str) -> Vec<LaunchItem> {
@@ -192,6 +288,14 @@ impl DynamicProvider for FileSearchProvider {
8_000
}
/// File search is opt-in: it only runs when `:file` (or the `/` alias) is
/// active, never on a bare auto-mode keystroke. Each query shells out to
/// `fd`/`locate`, so running it per-keystroke in auto mode stalls the
/// whole result set.
fn runs_in_auto_mode(&self) -> bool {
false
}
fn prefix(&self) -> Option<&str> {
Some(":file")
}
@@ -219,28 +323,28 @@ mod tests {
#[test]
fn test_extract_search_term() {
// Markers are stripped upstream; the term arrives clean and is used
// verbatim (just trimmed). No more `/`, `file `, `find ` magic — that
// caused `:file find me` to search for "me".
assert_eq!(
FileSearchProvider::extract_search_term("/ config.toml"),
FileSearchProvider::extract_search_term("config.toml"),
Some("config.toml")
);
assert_eq!(
FileSearchProvider::extract_search_term("/config"),
Some("config")
);
assert_eq!(
FileSearchProvider::extract_search_term("file bashrc"),
FileSearchProvider::extract_search_term(" bashrc "),
Some("bashrc")
);
assert_eq!(
FileSearchProvider::extract_search_term("find readme"),
Some("readme")
FileSearchProvider::extract_search_term("find me"),
Some("find me"),
"the leading word must NOT be stripped anymore"
);
}
#[test]
fn test_extract_search_term_empty() {
assert_eq!(FileSearchProvider::extract_search_term("/"), Some(""));
assert_eq!(FileSearchProvider::extract_search_term("/ "), Some(""));
assert_eq!(FileSearchProvider::extract_search_term(""), None);
assert_eq!(FileSearchProvider::extract_search_term(" "), None);
}
#[test]
@@ -268,11 +372,77 @@ mod tests {
#[test]
fn test_evaluate_empty() {
let provider = FileSearchProvider::new();
let results = provider.evaluate("/");
assert!(results.is_empty());
assert!(provider.evaluate("").is_empty());
assert!(provider.evaluate(" ").is_empty());
}
let results = provider.evaluate("/ ");
assert!(results.is_empty());
#[test]
fn filesearch_does_not_run_in_auto_mode() {
let provider = FileSearchProvider::new();
assert!(
!provider.runs_in_auto_mode(),
"file search must be opt-in (prefix only), never auto-mode"
);
}
#[test]
fn fd_args_are_bounded() {
let args = FileSearchProvider::fd_args("needle", &[]);
// Depth cap present.
let depth_idx = args.iter().position(|a| a == "--max-depth").expect("depth");
assert_eq!(args[depth_idx + 1], MAX_DEPTH.to_string());
// Result cap present.
assert!(args.iter().any(|a| a == "--max-results"));
// Every noise dir is excluded.
for ex in EXCLUDES {
assert!(
args.windows(2).any(|w| w[0] == "--exclude" && w[1] == *ex),
"missing --exclude {ex}"
);
}
// Pattern is present; no roots appended.
assert!(args.contains(&"needle".to_string()));
assert_eq!(args.last().unwrap(), "needle");
}
#[test]
fn fd_args_append_roots_after_pattern() {
let roots = vec!["/a".to_string(), "/b".to_string()];
let args = FileSearchProvider::fd_args("needle", &roots);
let pat = args.iter().position(|a| a == "needle").unwrap();
assert_eq!(&args[pat + 1..], &["/a".to_string(), "/b".to_string()]);
}
#[test]
fn expand_tilde_resolves_home() {
assert_eq!(
FileSearchProvider::expand_tilde("~/Documents", "/home/u"),
"/home/u/Documents"
);
assert_eq!(FileSearchProvider::expand_tilde("~", "/home/u"), "/home/u");
assert_eq!(
FileSearchProvider::expand_tilde("/etc", "/home/u"),
"/etc",
"absolute paths pass through untouched"
);
}
#[test]
fn resolve_roots_drops_nonexistent_and_keeps_dirs() {
let home = std::env::temp_dir();
let home_str = home.to_string_lossy().to_string();
let resolved = FileSearchProvider::resolve_roots(
vec![
"~".to_string(),
"/definitely/not/a/real/dir/xyz".to_string(),
],
&home_str,
);
assert_eq!(
resolved,
vec![home_str],
"only existing directories survive; ~ expands to home"
);
}
#[test]
+92 -1
View File
@@ -217,6 +217,17 @@ pub trait DynamicProvider: Send + Sync {
fn query(&self, query: &str) -> Vec<LaunchItem>;
fn priority(&self) -> u32;
/// Whether this provider contributes to bare auto-mode queries (no prefix).
///
/// Cheap in-process providers (calculator, converter, web search) return
/// `true` and run on every keystroke. Providers that shell out and do I/O
/// per query (file search) return `false`, so they only run when their
/// prefix is explicitly active — otherwise every keystroke spawns a
/// subprocess and stalls the whole result set.
fn runs_in_auto_mode(&self) -> bool {
true
}
/// Optional search prefix (e.g. ":calc"). None = no prefix.
fn prefix(&self) -> Option<&str> {
None
@@ -337,7 +348,9 @@ impl ProviderManager {
}
#[cfg(feature = "filesearch")]
if cfg_snapshot.filesearch {
builtin_dynamic.push(Box::new(filesearch::FileSearchProvider::new()));
builtin_dynamic.push(Box::new(filesearch::FileSearchProvider::with_roots(
cfg_snapshot.filesearch_roots.clone(),
)));
info!("Registered filesearch provider");
}
#[cfg(feature = "websearch")]
@@ -514,6 +527,14 @@ impl ProviderManager {
if !filter.is_active(provider.provider_type()) {
continue;
}
// Expensive providers (file search) only run when their prefix
// is explicitly selected — never on a bare auto-mode keystroke.
let prefix_active = filter
.active_prefix()
.is_some_and(|p| p == provider.provider_type());
if !prefix_active && !provider.runs_in_auto_mode() {
continue;
}
let dynamic_results = provider.query(query);
let base_score = provider.priority() as i64;
let grouping_bonus: i64 = match provider.provider_type() {
@@ -929,6 +950,76 @@ mod tests {
assert_eq!(results[0].0.name, "Firefox");
}
/// Dynamic provider that counts how often `query()` is invoked, used to
/// verify auto-mode gating without shelling out to a real backend.
struct SpyDynamic {
calls: std::sync::Arc<std::sync::atomic::AtomicUsize>,
auto: bool,
type_id: String,
}
impl DynamicProvider for SpyDynamic {
fn name(&self) -> &str {
"Spy"
}
fn provider_type(&self) -> ProviderType {
ProviderType::Plugin(self.type_id.clone())
}
fn query(&self, _query: &str) -> Vec<LaunchItem> {
self.calls.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
Vec::new()
}
fn priority(&self) -> u32 {
100
}
fn runs_in_auto_mode(&self) -> bool {
self.auto
}
}
fn run_spy_search(auto: bool, prefix: Option<ProviderType>) -> usize {
use crate::filter::ProviderFilter;
let calls = std::sync::Arc::new(std::sync::atomic::AtomicUsize::new(0));
let spy = SpyDynamic {
calls: calls.clone(),
auto,
type_id: "filesearch".into(),
};
let pm = ProviderManager::new(Vec::new(), vec![Box::new(spy)]);
let frecency = FrecencyStore::new();
let mut filter = ProviderFilter::all();
filter.set_prefix(prefix);
let _ = pm.search_with_frecency("firefox", 10, &filter, &frecency, 0.0, None);
calls.load(std::sync::atomic::Ordering::SeqCst)
}
#[test]
fn expensive_dynamic_provider_skipped_in_auto_mode() {
assert_eq!(
run_spy_search(false, None),
0,
"a runs_in_auto_mode()=false provider must not run on a bare auto-mode query"
);
}
#[test]
fn expensive_dynamic_provider_runs_when_prefix_active() {
assert_eq!(
run_spy_search(false, Some(ProviderType::Plugin("filesearch".into()))),
1,
"the provider must run when its own prefix is active"
);
}
#[test]
fn cheap_dynamic_provider_runs_in_auto_mode() {
assert_eq!(
run_spy_search(true, None),
1,
"a runs_in_auto_mode()=true provider must run on a bare auto-mode query"
);
}
// =========================================================================
// Tests for behavior introduced in the v2 C-ABI demolition (commit ae4a903)
// =========================================================================
+5
View File
@@ -164,3 +164,8 @@ frecency_weight = 0.3 # 0.0 = disabled, 1.0 = strong boost
# Options: google, duckduckgo, bing, startpage, searxng, brave, ecosia
# Or a custom URL: "https://search.example.com/?q={query}"
search_engine = "duckduckgo"
# Root directories for file search (the :file prefix or the / alias). "~/" is
# expanded; non-existent entries are dropped. Empty (default) searches $HOME.
# File search is opt-in — it never runs on a bare query, only under :file / /.
# filesearch_roots = ["~/Documents", "~/code"]
+5
View File
@@ -33,6 +33,11 @@ owlry.set {
search_engine = "duckduckgo", -- google | duckduckgo | bing |
-- startpage | searxng | brave |
-- ecosia, or a custom "{query}" URL.
-- Root dirs for file search (the :file prefix or the / alias). "~/" is
-- expanded; non-existent entries are dropped. Empty (the default) searches
-- $HOME. File search is opt-in only — it never runs on a bare query.
-- filesearch_roots = { "~/Documents", "~/code" },
}
-- ──────────────────────────────────────────────────────────────────────
+3
View File
@@ -132,6 +132,9 @@ Sets top-level config values. Takes a table of key-value pairs. Calling `owlry.s
| `frecency` | `boolean` | `true` | Boost frequently/recently used items |
| `frecency_weight` | `number` | `0.3` | Frecency boost weight (0.0 = off, 1.0 = strong) |
| `search_engine` | `string` | `"duckduckgo"` | Engine for `:web` / `?` queries (see §6) |
| `filesearch_roots` | `string[]` | `{}` | Root dirs for `:file` / `/` search (`~/` allowed). Empty searches `$HOME`. Non-existent entries are dropped |
> File search is **opt-in**: it only runs under the `:file` prefix (or the `/` alias), never on a bare query, because each keystroke shells out to `fd`/`locate`. Walks are bounded (depth, excludes, 20 results, 750 ms).
**Example:**