Files
gnoma/internal/engine/image_input_test.go
T
vikingowl bc137182d4 feat(engine): parse [Image: /path] markers, gate on Vision capability
buildUserMessage replaces the unconditional NewUserText wrap inside
SubmitWithOptions. When the active model advertises Vision and the
input contains [Image: /path] markers, the markers are inlined as
ImageContent blocks carrying the file bytes; otherwise the input is
passed through as a single text block (legacy behavior preserved
for subprocess CLIs that auto-ingest paths, e.g. gemini-cli).

image_input.go:
- imageMarkerRe extracts each [Image: ...] occurrence.
- Per marker: validates absolute path, file (not dir), size cap of
  10 MiB, image/* media type via http.DetectContentType.
- On any validation failure, the marker is left as literal text and
  a warning is recorded — the turn still proceeds.

Routing: latestUserHasImages drives task.RequiresVision in both the
primary stream attempt and the retryOnTransient path, so failover
arms also respect the vision requirement.

Tests cover: no markers (single text block), single image
(bytes captured into Image.Data, MediaType set), missing file
(literal fallback + warning), relative path rejection, oversized
rejection, non-image file rejection, multiple images interleaved
with text.
2026-05-22 11:50:45 +02:00

156 lines
4.8 KiB
Go

package engine
import (
"bytes"
"os"
"path/filepath"
"strings"
"testing"
"somegit.dev/Owlibou/gnoma/internal/message"
)
// pngOnePixel is the minimum valid 1x1 PNG. Used so http.DetectContentType
// returns "image/png" and the parser accepts the file.
var pngOnePixel = []byte{
0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A,
0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52,
0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53,
0xDE, 0x00, 0x00, 0x00, 0x0C, 0x49, 0x44, 0x41,
0x54, 0x08, 0x99, 0x63, 0xF8, 0xCF, 0xC0, 0x00,
0x00, 0x00, 0x03, 0x00, 0x01, 0x5B, 0x3E, 0xBA,
0xD6, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4E,
0x44, 0xAE, 0x42, 0x60, 0x82,
}
func writeTempPNG(t *testing.T) string {
t.Helper()
p := filepath.Join(t.TempDir(), "test.png")
if err := os.WriteFile(p, pngOnePixel, 0o600); err != nil {
t.Fatal(err)
}
return p
}
func TestParseImageMarkers_NoMarkers(t *testing.T) {
got, warns := parseImageMarkers("just plain text")
if len(got) != 1 || got[0].Type != message.ContentText || got[0].Text != "just plain text" {
t.Errorf("got %+v, want single text block", got)
}
if len(warns) != 0 {
t.Errorf("unexpected warnings: %v", warns)
}
}
func TestParseImageMarkers_SingleImage(t *testing.T) {
path := writeTempPNG(t)
got, warns := parseImageMarkers("[Image: " + path + "] what is this?")
if len(warns) != 0 {
t.Fatalf("unexpected warnings: %v", warns)
}
if len(got) != 2 {
t.Fatalf("got %d blocks, want 2", len(got))
}
if got[0].Type != message.ContentImage {
t.Errorf("block 0 type = %v, want ContentImage", got[0].Type)
}
if got[0].Image == nil || !bytes.Equal(got[0].Image.Data, pngOnePixel) {
t.Error("image bytes not captured into Content.Image.Data")
}
if got[0].Image.MediaType != "image/png" {
t.Errorf("MediaType = %q, want image/png", got[0].Image.MediaType)
}
if got[1].Type != message.ContentText || got[1].Text != " what is this?" {
t.Errorf("block 1 = %+v, want trailing text", got[1])
}
}
func TestParseImageMarkers_MissingFileWarnsAndFallsBackToText(t *testing.T) {
got, warns := parseImageMarkers("see [Image: /nonexistent/path.png] please")
if len(warns) != 1 {
t.Fatalf("got %d warnings, want 1", len(warns))
}
if !strings.Contains(warns[0], "/nonexistent/path.png") {
t.Errorf("warning %q should mention path", warns[0])
}
// Marker stays as literal text so subprocess CLIs that auto-ingest paths still work.
var joined string
for _, c := range got {
if c.Type == message.ContentText {
joined += c.Text
}
if c.Type == message.ContentImage {
t.Error("missing file should not produce image content")
}
}
if !strings.Contains(joined, "[Image: /nonexistent/path.png]") {
t.Errorf("joined text %q should keep literal marker", joined)
}
}
func TestParseImageMarkers_RelativePathRejected(t *testing.T) {
_, warns := parseImageMarkers("[Image: relative/path.png]")
if len(warns) != 1 {
t.Fatalf("got %d warnings, want 1", len(warns))
}
if !strings.Contains(warns[0], "absolute") {
t.Errorf("warning %q should explain absolute-path requirement", warns[0])
}
}
func TestParseImageMarkers_OversizedRejected(t *testing.T) {
p := filepath.Join(t.TempDir(), "big.png")
// Write a >10MiB file (header still says PNG so media type detect passes).
big := make([]byte, imageMaxBytes+1)
copy(big, pngOnePixel)
if err := os.WriteFile(p, big, 0o600); err != nil {
t.Fatal(err)
}
_, warns := parseImageMarkers("[Image: " + p + "]")
if len(warns) != 1 {
t.Fatalf("got %d warnings, want 1", len(warns))
}
if !strings.Contains(warns[0], "exceeds") {
t.Errorf("warning %q should explain size limit", warns[0])
}
}
func TestParseImageMarkers_NonImageFileRejected(t *testing.T) {
p := filepath.Join(t.TempDir(), "not_an_image.txt")
if err := os.WriteFile(p, []byte("plain text, not an image"), 0o600); err != nil {
t.Fatal(err)
}
_, warns := parseImageMarkers("[Image: " + p + "]")
if len(warns) != 1 {
t.Fatalf("got %d warnings, want 1", len(warns))
}
if !strings.Contains(warns[0], "unsupported media type") {
t.Errorf("warning %q should mention media type", warns[0])
}
}
func TestParseImageMarkers_MultipleImagesAndText(t *testing.T) {
p1 := writeTempPNG(t)
p2 := writeTempPNG(t)
input := "before [Image: " + p1 + "] between [Image: " + p2 + "] after"
got, warns := parseImageMarkers(input)
if len(warns) != 0 {
t.Fatalf("unexpected warnings: %v", warns)
}
// Expected order: text, image, text, image, text
wantTypes := []message.ContentType{
message.ContentText, message.ContentImage,
message.ContentText, message.ContentImage,
message.ContentText,
}
if len(got) != len(wantTypes) {
t.Fatalf("got %d blocks, want %d", len(got), len(wantTypes))
}
for i, want := range wantTypes {
if got[i].Type != want {
t.Errorf("block %d type = %v, want %v", i, got[i].Type, want)
}
}
}