package slm import ( "context" "errors" "fmt" "log/slog" "net" "net/http" "os" "os/exec" "path/filepath" "strconv" "strings" "time" ) const pidFile = "llamafile.pid" // DefaultModelURL is the default llamafile to download when none is configured. // Qwen2.5 0.5B Instruct Q6_K (~450 MB) — small, fast, and supports tools. const DefaultModelURL = "https://huggingface.co/Mozilla/Qwen2.5-0.5B-Instruct-llamafile/resolve/main/Qwen2.5-0.5B-Instruct-Q6_K.llamafile" const DefaultModelSHA256 = "c4e991af9ea7077339b8768e349da486a76392e72b3ef47ad372e6582779a8dd" // DefaultDataDir returns the platform default SLM data directory. // Follows XDG Base Directory Specification: $XDG_DATA_HOME/gnoma/slm, // falling back to ~/.local/share/gnoma/slm. func DefaultDataDir() string { dir := os.Getenv("XDG_DATA_HOME") if dir == "" { home, _ := os.UserHomeDir() dir = filepath.Join(home, ".local", "share") } return filepath.Join(dir, "gnoma", "slm") } // Status describes the setup state of the SLM. type Status int const ( StatusNotSetUp Status = iota // no manifest on disk StatusReady // manifest + binary file both exist StatusMissing // manifest exists but binary file is gone ) func (s Status) String() string { switch s { case StatusNotSetUp: return "not set up" case StatusReady: return "ready" case StatusMissing: return "file missing" default: return "unknown" } } // Config holds Manager configuration. type Config struct { DataDir string // XDG data home / gnoma / slm; must be set ModelURL string // required for Setup ExpectedSHA256 string // if non-empty, Setup verifies against this } // Manager controls the llamafile lifecycle. type Manager struct { cfg Config process *os.Process port int logger *slog.Logger startupBegin time.Time startupDuration time.Duration // 0 until Start() returns healthy } // StartupDuration returns the elapsed time from Start() invocation to the // first successful health check. Returns 0 when llamafile is not (yet) ready. func (m *Manager) StartupDuration() time.Duration { return m.startupDuration } // New creates a Manager. DataDir must be non-empty. func New(cfg Config, logger *slog.Logger) *Manager { if logger == nil { logger = slog.Default() } return &Manager{cfg: cfg, logger: logger} } // IsSetUp returns true when Status() == StatusReady. func (m *Manager) IsSetUp() bool { return m.Status() == StatusReady } // Status returns the current setup state by inspecting the manifest and filesystem. func (m *Manager) Status() Status { mf, err := readManifest(m.cfg.DataDir) if err != nil { return StatusNotSetUp } if _, err := os.Stat(mf.FilePath); err != nil { return StatusMissing } return StatusReady } // Setup downloads the llamafile from ModelURL, verifies the hash, and writes the manifest. // progress receives (downloaded, total) byte counts; may be nil. func (m *Manager) Setup(ctx context.Context, progress func(downloaded, total int64)) error { if m.cfg.ModelURL == "" { return fmt.Errorf("slm: ModelURL is required") } if m.Status() == StatusReady { return nil } if err := os.MkdirAll(m.cfg.DataDir, 0700); err != nil { return fmt.Errorf("slm: create data dir: %w", err) } name := filepath.Base(m.cfg.ModelURL) if name == "" || name == "." { name = "llamafile" } dst := filepath.Join(m.cfg.DataDir, name) m.logger.Info("downloading llamafile", "url", m.cfg.ModelURL, "dst", dst) sha256hex, size, err := download(ctx, m.cfg.ModelURL, dst, progress) if err != nil { return err } if m.cfg.ExpectedSHA256 != "" && sha256hex != m.cfg.ExpectedSHA256 { _ = os.Remove(dst) // cleanup corrupt/malicious download return fmt.Errorf("slm: hash mismatch for %s: got %s, want %s", m.cfg.ModelURL, sha256hex, m.cfg.ExpectedSHA256) } mf := &Manifest{ ModelURL: m.cfg.ModelURL, FilePath: dst, SHA256: sha256hex, Size: size, SetupAt: time.Now().UTC(), } return writeManifest(m.cfg.DataDir, mf) } // Start launches the llamafile subprocess and returns its base URL. // Reaps a stale PID file from a previous run if present. func (m *Manager) Start(ctx context.Context) (string, error) { m.startupBegin = time.Now() mf, err := readManifest(m.cfg.DataDir) if err != nil { return "", fmt.Errorf("slm: not set up: %w", err) } if _, err := os.Stat(mf.FilePath); err != nil { return "", fmt.Errorf("slm: llamafile missing at %s", mf.FilePath) } m.reapStalePID() port, err := freePort() if err != nil { return "", fmt.Errorf("slm: find free port: %w", err) } // Invoke via sh to bypass Wine binfmt_misc interception of APE polyglot binaries. // llamafile is a valid POSIX shell script; sh executes the embedded launcher header. cmd := exec.CommandContext(ctx, "sh", mf.FilePath, "--server", "--host", "127.0.0.1", "--port", strconv.Itoa(port), "--nobrowser", ) if err := cmd.Start(); err != nil { return "", fmt.Errorf("slm: start llamafile: %w", err) } m.process = cmd.Process m.port = port if err := os.WriteFile(m.pidPath(), []byte(strconv.Itoa(cmd.Process.Pid)), 0600); err != nil { m.logger.Warn("failed to write pid file", "error", err) } baseURL := fmt.Sprintf("http://127.0.0.1:%d", port) m.logger.Info("llamafile started", "pid", cmd.Process.Pid, "url", baseURL) if err := waitHealthy(ctx, baseURL); err != nil { _ = m.Stop() return "", err } m.startupDuration = time.Since(m.startupBegin) m.logger.Info("llamafile healthy", "url", baseURL, "startup", m.startupDuration) return baseURL, nil } // Stop terminates the llamafile process and cleans up the PID file. func (m *Manager) Stop() error { if m.process == nil { return nil } if err := m.process.Kill(); err != nil && !errors.Is(err, os.ErrProcessDone) { return fmt.Errorf("slm: kill llamafile: %w", err) } m.process = nil m.port = 0 _ = os.Remove(m.pidPath()) return nil } // BaseURL returns the current server base URL, or "" if not running. func (m *Manager) BaseURL() string { if m.process == nil || m.port == 0 { return "" } return fmt.Sprintf("http://127.0.0.1:%d", m.port) } // Manifest returns the on-disk manifest if present, or nil. func (m *Manager) Manifest() *Manifest { mf, err := readManifest(m.cfg.DataDir) if err != nil { return nil } return mf } func (m *Manager) pidPath() string { return filepath.Join(m.cfg.DataDir, pidFile) } func (m *Manager) reapStalePID() { data, err := os.ReadFile(m.pidPath()) if err != nil { return } pid, err := strconv.Atoi(strings.TrimSpace(string(data))) if err != nil { _ = os.Remove(m.pidPath()) return } proc, err := os.FindProcess(pid) if err != nil { _ = os.Remove(m.pidPath()) return } _ = proc.Kill() _ = os.Remove(m.pidPath()) m.logger.Debug("reaped stale llamafile process", "pid", pid) } // freePort binds on :0 to let the OS pick an available port, then releases it. // There is a small TOCTOU window between release and use, which is acceptable for a local dev tool. func freePort() (int, error) { l, err := net.Listen("tcp", "127.0.0.1:0") if err != nil { return 0, err } port := l.Addr().(*net.TCPAddr).Port _ = l.Close() return port, nil } // waitHealthy polls baseURL/health until it returns 200 or ctx is cancelled. // The ctx deadline governs how long we'll wait — callers should pass a // context with a budget appropriate for first-launch cold start. func waitHealthy(ctx context.Context, baseURL string) error { client := &http.Client{Timeout: 2 * time.Second} for { select { case <-ctx.Done(): return fmt.Errorf("slm: health check did not pass before context deadline: %w", ctx.Err()) default: } resp, err := client.Get(baseURL + "/health") if err == nil { _ = resp.Body.Close() if resp.StatusCode == http.StatusOK { return nil } } time.Sleep(200 * time.Millisecond) } }