simplify vision support code
This commit is contained in:
parent
7f2c335a76
commit
ac390253c3
@ -5,8 +5,6 @@ import mimetypes
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from nanobot.agent.memory import MemoryStore
|
from nanobot.agent.memory import MemoryStore
|
||||||
from nanobot.agent.skills import SkillsLoader
|
from nanobot.agent.skills import SkillsLoader
|
||||||
|
|
||||||
@ -148,50 +146,23 @@ When remembering something, write to {workspace_path}/memory/MEMORY.md"""
|
|||||||
|
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
def _build_user_content(
|
def _build_user_content(self, text: str, media: list[str] | None) -> str | list[dict[str, Any]]:
|
||||||
self, text: str, media: list[str] | None
|
"""Build user message content with optional base64-encoded images."""
|
||||||
) -> str | list[dict[str, Any]]:
|
|
||||||
"""
|
|
||||||
Build user message content, optionally with images.
|
|
||||||
|
|
||||||
Returns a plain string if no media, or a multimodal content list
|
|
||||||
with base64-encoded images.
|
|
||||||
"""
|
|
||||||
if not media:
|
if not media:
|
||||||
return text
|
return text
|
||||||
|
|
||||||
content: list[dict[str, Any]] = []
|
images = []
|
||||||
|
|
||||||
for path in media:
|
for path in media:
|
||||||
encoded = self._encode_image(path)
|
p = Path(path)
|
||||||
if encoded:
|
mime, _ = mimetypes.guess_type(path)
|
||||||
content.append(encoded)
|
if not p.is_file() or not mime or not mime.startswith("image/"):
|
||||||
|
continue
|
||||||
|
b64 = base64.b64encode(p.read_bytes()).decode()
|
||||||
|
images.append({"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}})
|
||||||
|
|
||||||
if not content:
|
if not images:
|
||||||
return text
|
return text
|
||||||
|
return images + [{"type": "text", "text": text}]
|
||||||
content.append({"type": "text", "text": text})
|
|
||||||
return content
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _encode_image(file_path: str) -> dict[str, Any] | None:
|
|
||||||
"""Encode a local image file to a base64 data URL for the LLM."""
|
|
||||||
path = Path(file_path)
|
|
||||||
if not path.is_file():
|
|
||||||
logger.warning(f"Media file not found: {file_path}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
mime, _ = mimetypes.guess_type(file_path)
|
|
||||||
if not mime or not mime.startswith("image/"):
|
|
||||||
logger.warning(f"Unsupported media type for {file_path}: {mime}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
data = path.read_bytes()
|
|
||||||
b64 = base64.b64encode(data).decode("utf-8")
|
|
||||||
return {
|
|
||||||
"type": "image_url",
|
|
||||||
"image_url": {"url": f"data:{mime};base64,{b64}"},
|
|
||||||
}
|
|
||||||
|
|
||||||
def add_tool_result(
|
def add_tool_result(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user