Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/LlmContentEditor/Domain/Agent/ContentEditorAgent.php
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,18 @@ protected function tools(): array
)
)->setCallable(fn (string $url): string => $this->sitebuilderFacade->getRemoteAssetInfo($url)),

Tool::make(
'fetch_remote_web_page',
'Fetch textual content from a remote web page via cURL. Use this when the user asks to inspect, summarize, adapt, or copy content from an external URL. Returns JSON with response metadata and page content; on failure returns JSON with an "error" key.'
)->addProperty(
new ToolProperty(
'url',
PropertyType::STRING,
'The absolute URL to fetch (http or https).',
true
)
)->setCallable(fn (string $url): string => $this->sitebuilderFacade->fetchRemoteWebPage($url)),

Tool::make(
'get_workspace_rules',
'Get project-specific rules from .sitebuilder/rules/ folders. Returns a JSON object where keys are rule names (filename without .md extension) and values are the rule contents (Markdown text). IMPORTANT: You must call this tool at least once at the start of every session to understand project-specific conventions and requirements.'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ private function messageForToolCalling(AgentEventDto $event, string $locale): ?s
'list_remote_content_asset_urls' => 'fetching_remote_asset_urls',
'search_remote_content_asset_urls' => 'searching_remote_assets',
'get_remote_asset_info' => 'getting_remote_asset_info',
'fetch_remote_web_page' => 'fetching_remote_web_page',
'suggest_commit_message' => 'suggesting_commit_message',
'get_preview_url' => $label !== null ? 'getting_preview_url' : 'getting_preview_url_only',
default => $label !== null ? 'running_tool_on' : null,
Expand Down
5 changes: 5 additions & 0 deletions src/ProjectMgmt/Domain/ValueObject/AgentConfigTemplate.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ private static function defaultTemplate(): self
- Call list_remote_content_asset_urls to get a JSON array of all remote asset URLs configured for this project. Use these URLs directly (e.g. in img src). If the tool returns an empty array, no remote manifests are configured.
- Call get_remote_asset_info with a URL to retrieve metadata (width, height, mimeType, sizeInBytes) for a remote image without downloading it. Use this when you need dimensions or format for embedding.

REMOTE WEB PAGES:
- If the user asks you to inspect, summarize, adapt, or copy content from an external page, call fetch_remote_web_page with the page URL.
- fetch_remote_web_page returns JSON with fields like statusCode, contentType, finalUrl, content, and truncated.
- Only use this for http/https URLs. If the tool returns an error JSON object, explain the issue and ask for another URL.

WORKSPACE RULES:
- Projects may define custom rules in .sitebuilder/rules/ folders (Markdown files)
- You MUST call get_workspace_rules whenever you start working on a task
Expand Down
134 changes: 134 additions & 0 deletions src/WorkspaceTooling/Facade/WorkspaceToolingFacade.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@

final class WorkspaceToolingFacade extends BaseWorkspaceToolingFacade implements WorkspaceToolingServiceInterface
{
private const string WORKSPACE_MOUNT_POINT = '/workspace';
private const string CURL_META_MARKER = '__PB_CURL_META__';
private const int REMOTE_WEB_PAGE_MAX_BYTES = 50_000;
private const int REMOTE_WEB_PAGE_TIMEOUT_SECONDS = 20;
private const int REMOTE_WEB_PAGE_CONNECT_TIMEOUT_SECONDS = 10;

public function __construct(
FileOperationsServiceInterface $fileOperationsService,
TextOperationsService $textOperationsService,
Expand Down Expand Up @@ -228,6 +234,134 @@ public function getWorkspaceRules(): string
return json_encode($rules, JSON_THROW_ON_ERROR);
}

public function fetchRemoteWebPage(string $url): string
{
$normalizedUrl = trim($url);
if (!$this->isAllowedRemoteWebPageUrl($normalizedUrl)) {
return $this->encodeRemoteWebPageError(
'Invalid URL. Only absolute http/https URLs are supported.',
$normalizedUrl
);
}

$workspacePath = $this->executionContext->getWorkspacePath();
if ($workspacePath === null || $workspacePath === '' || !is_dir($workspacePath)) {
return $this->encodeRemoteWebPageError(
'Execution context not set. Cannot resolve workspace path for cURL fetch.',
$normalizedUrl
);
}

try {
$output = $this->shellOperationsService->runCommand(
self::WORKSPACE_MOUNT_POINT,
$this->buildFetchRemoteWebPageCommand($normalizedUrl)
);
} catch (Throwable $throwable) {
return $this->encodeRemoteWebPageError(
'Failed to fetch remote page: ' . $throwable->getMessage(),
$normalizedUrl
);
}

$markerPos = strrpos($output, self::CURL_META_MARKER);
if ($markerPos === false) {
return $this->encodeRemoteWebPageError(
'cURL output did not contain expected metadata.',
$normalizedUrl
);
}

$contentEnd = $markerPos;
if ($contentEnd > 0 && $output[$contentEnd - 1] === "\n") {
--$contentEnd;
}

$content = substr($output, 0, $contentEnd);
$metaRaw = trim(substr($output, $markerPos + strlen(self::CURL_META_MARKER)));

if (!preg_match('/^(\d{3})\t([^\t]*)\t(\S+)/', $metaRaw, $matches)) {
return $this->encodeRemoteWebPageError(
'Failed to parse metadata from cURL output.',
$normalizedUrl
);
}

$statusCode = (int) $matches[1];
$contentType = $matches[2];
$finalUrl = $matches[3];

$truncated = false;
if (strlen($content) > self::REMOTE_WEB_PAGE_MAX_BYTES) {
$content = substr($content, 0, self::REMOTE_WEB_PAGE_MAX_BYTES);
$truncated = true;
}

return $this->encodeJsonSafe([
'url' => $normalizedUrl,
'finalUrl' => $finalUrl,
'statusCode' => $statusCode,
'contentType' => $contentType,
'content' => $content,
'truncated' => $truncated,
]);
}

private function buildFetchRemoteWebPageCommand(string $url): string
{
$escapedUrl = escapeshellarg($url);
$writeOutFormat = escapeshellarg('\n' . self::CURL_META_MARKER . '%{http_code}\t%{content_type}\t%{url_effective}');

return sprintf(
'curl -L -sS --max-time %d --connect-timeout %d --output - --write-out %s %s',
self::REMOTE_WEB_PAGE_TIMEOUT_SECONDS,
self::REMOTE_WEB_PAGE_CONNECT_TIMEOUT_SECONDS,
$writeOutFormat,
$escapedUrl
);
}

private function isAllowedRemoteWebPageUrl(string $url): bool
{
$parsed = parse_url($url);
if (!is_array($parsed)) {
return false;
}

$scheme = $parsed['scheme'] ?? null;
$host = $parsed['host'] ?? null;
if (!is_string($scheme) || !is_string($host)) {
return false;
}

if (!in_array(strtolower($scheme), ['http', 'https'], true)) {
return false;
}

return $host !== '';
}

private function encodeRemoteWebPageError(string $error, string $url): string
{
return $this->encodeJsonSafe([
'error' => $error,
'url' => $url,
]);
}

/**
* @param array<string, bool|int|string|null> $payload
*/
private function encodeJsonSafe(array $payload): string
{
$json = json_encode($payload, JSON_INVALID_UTF8_SUBSTITUTE);
if (is_string($json)) {
return $json;
}

return '{"error":"Unable to encode JSON response.","url":""}';
}

public function runBuildInWorkspace(string $workspacePath, string $agentImage): string
{
return $this->dockerExecutor->run(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,13 @@ public function getRemoteAssetInfo(string $url): string;
*/
public function getWorkspaceRules(): string;

/**
* Fetch the textual content of a remote web page via cURL.
* Returns JSON with keys: url, finalUrl, statusCode, contentType, content, truncated.
* On failure, returns JSON with keys: error, url. Never throws.
*/
public function fetchRemoteWebPage(string $url): string;

/**
* Run build (npm run build) in the specified workspace.
*
Expand Down
23 changes: 23 additions & 0 deletions tests/Unit/LlmContentEditor/ContentEditorAgentTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use NeuronAI\Chat\Messages\ToolCallResultMessage;
use NeuronAI\Chat\Messages\UserMessage;
use NeuronAI\Tools\Tool;
use NeuronAI\Tools\ToolInterface;
use PHPUnit\Framework\TestCase;
use ReflectionMethod;

Expand Down Expand Up @@ -183,6 +184,28 @@ public function testAgentUsesProvidedConfig(): void
self::assertSame(['Custom output'], $outputInstructions);
}

public function testToolsContainFetchRemoteWebPageTool(): void
{
$agent = new ContentEditorAgent(
$this->createMockWorkspaceTooling(),
LlmModelName::defaultForContentEditor(),
'sk-test-key',
$this->createDefaultAgentConfig()
);
$ref = new ReflectionMethod(ContentEditorAgent::class, 'tools');
$ref->setAccessible(true);

/** @var list<ToolInterface> $tools */
$tools = $ref->invoke($agent);

$toolNames = array_map(
static fn (ToolInterface $tool): string => $tool->getName(),
$tools
);

self::assertContains('fetch_remote_web_page', $toolNames);
}

private function createMockWorkspaceTooling(): WorkspaceToolingServiceInterface
{
return $this->createMock(WorkspaceToolingServiceInterface::class);
Expand Down
13 changes: 13 additions & 0 deletions tests/Unit/LlmContentEditor/ProgressMessageResolverTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,19 @@ public function testToolCallingGetWorkspaceRulesTranslates(): void
self::assertSame('Loading workspace rules', $resolver->messageForEvent($event, 'en'));
}

public function testToolCallingFetchRemoteWebPageTranslates(): void
{
$translator = $this->createMock(TranslatorInterface::class);
$translator->method('trans')
->with('fetching_remote_web_page', [], 'progress', 'en')
->willReturn('Fetching remote web page');

$resolver = $this->createResolver($translator);
$event = new AgentEventDto('tool_calling', 'fetch_remote_web_page');

self::assertSame('Fetching remote web page', $resolver->messageForEvent($event, 'en'));
}

public function testInferenceStopReturnsNull(): void
{
$translator = $this->createMock(TranslatorInterface::class);
Expand Down
9 changes: 9 additions & 0 deletions tests/Unit/ProjectMgmt/AgentConfigTemplateTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ public function testDefaultTemplateContainsRemoteContentAssetsSection(): void
self::assertStringContainsString('get_remote_asset_info', $template->backgroundInstructions);
}

public function testDefaultTemplateContainsRemoteWebPagesSection(): void
{
$template = AgentConfigTemplate::forProjectType(ProjectType::DEFAULT);

self::assertStringContainsString('REMOTE WEB PAGES', $template->backgroundInstructions);
self::assertStringContainsString('fetch_remote_web_page', $template->backgroundInstructions);
self::assertStringContainsString('http/https', $template->backgroundInstructions);
}

public function testDefaultTemplateContainsWorkspaceRulesSection(): void
{
$template = AgentConfigTemplate::forProjectType(ProjectType::DEFAULT);
Expand Down
Loading