From dde2bea2cc377a51f6130114c1b80fce4851f958 Mon Sep 17 00:00:00 2001 From: Harry Date: Thu, 22 Jan 2026 23:36:32 +0800 Subject: [PATCH] fix(llm-skill): prompt tool call - Renamed `build_skill_artifact_set` to `build_skill_bundle` for improved clarity in asset management. - Updated references in `SkillManager` to reflect the new method name and ensure consistent handling of skill bundles. - Added `AppAssetsAttrsInitializer` to `SandboxManager` to enhance asset initialization processes. - Implemented output truncation in `SandboxBashTool` to manage long command outputs effectively. --- api/core/app_assets/paths.py | 2 +- api/core/sandbox/bash/bash_tool.py | 31 ++++++++++++++--- api/core/sandbox/builder.py | 34 ++++++++++++------- .../sandbox/initializer/skill_initializer.py | 7 ---- api/core/sandbox/manager.py | 4 +++ api/core/skill/skill_manager.py | 8 ++--- api/core/workflow/nodes/base/node.py | 2 +- api/core/workflow/nodes/llm/node.py | 10 ++++-- 8 files changed, 65 insertions(+), 33 deletions(-) diff --git a/api/core/app_assets/paths.py b/api/core/app_assets/paths.py index 2eceadb798..cdfbdcc923 100644 --- a/api/core/app_assets/paths.py +++ b/api/core/app_assets/paths.py @@ -14,7 +14,7 @@ class AssetPaths: return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/resolved/{node_id}" @staticmethod - def build_skill_artifact_set(tenant_id: str, app_id: str, assets_id: str) -> str: + def build_skill_bundle(tenant_id: str, app_id: str, assets_id: str) -> str: return f"{AssetPaths._BASE}/{tenant_id}/{app_id}/artifacts/{assets_id}/skill_artifact_set.json" @staticmethod diff --git a/api/core/sandbox/bash/bash_tool.py b/api/core/sandbox/bash/bash_tool.py index 6003c527c8..a2d55bf857 100644 --- a/api/core/sandbox/bash/bash_tool.py +++ b/api/core/sandbox/bash/bash_tool.py @@ -20,6 +20,27 @@ from ..utils.debug import sandbox_debug COMMAND_TIMEOUT_SECONDS = 60 * 60 +# Output truncation settings to avoid overwhelming model context +# 8000 chars ≈ 2000-2700 tokens, safe for models with 8K+ context +MAX_OUTPUT_LENGTH = 8000 +TRUNCATE_HEAD_LENGTH = 2500 # Keep beginning for context +TRUNCATE_TAIL_LENGTH = 2500 # Keep end for results/errors + + +def _truncate_output(output: str, name: str = "output") -> str: + """Truncate output if it exceeds the maximum length. + + Keeps the head and tail of the output to preserve context and final results. + """ + if len(output) <= MAX_OUTPUT_LENGTH: + return output + + omitted_length = len(output) - TRUNCATE_HEAD_LENGTH - TRUNCATE_TAIL_LENGTH + head = output[:TRUNCATE_HEAD_LENGTH] + tail = output[-TRUNCATE_TAIL_LENGTH:] + + return f"{head}\n\n... [{omitted_length} characters omitted from {name}] ...\n\n{tail}" + class SandboxBashTool(Tool): def __init__(self, sandbox: VirtualEnvironment, tenant_id: str, tools_path: str | None = None) -> None: @@ -35,7 +56,7 @@ class SandboxBashTool(Tool): ), parameters=[ ToolParameter.get_simple_instance( - name="command", + name="bash", llm_description="The bash command to execute in current working directory", typ=ToolParameter.ToolParameterType.STRING, required=True, @@ -65,7 +86,7 @@ class SandboxBashTool(Tool): app_id: str | None = None, message_id: str | None = None, ) -> Generator[ToolInvokeMessage, None, None]: - command = tool_parameters.get("command", "") + command = tool_parameters.get("bash", "") if not command: yield self.create_text_message("Error: No command provided") return @@ -92,14 +113,16 @@ class SandboxBashTool(Tool): stdout = result.stdout.decode("utf-8", errors="replace") if result.stdout else "" stderr = result.stderr.decode("utf-8", errors="replace") if result.stderr else "" - exit_code = result.exit_code + + # Truncate long outputs to avoid overwhelming the model + stdout = _truncate_output(stdout, "stdout") + stderr = _truncate_output(stderr, "stderr") output_parts: list[str] = [] if stdout: output_parts.append(f"\n{stdout}") if stderr: output_parts.append(f"\n{stderr}") - output_parts.append(f"\nCommand exited with code {exit_code}") yield self.create_text_message("\n".join(output_parts)) diff --git a/api/core/sandbox/builder.py b/api/core/sandbox/builder.py index 772f1d3ddc..28d8b75a18 100644 --- a/api/core/sandbox/builder.py +++ b/api/core/sandbox/builder.py @@ -5,6 +5,8 @@ import threading from collections.abc import Mapping, Sequence from typing import TYPE_CHECKING, Any +from flask import current_app + from core.entities.provider_entities import BasicProviderConfig from core.virtual_environment.__base.virtual_environment import VirtualEnvironment @@ -119,22 +121,28 @@ class SandboxBuilder: init.initialize(sandbox) # Run sandbox setup asynchronously so workflow execution can proceed. - def initialize() -> None: - try: - for init in self._initializers: - if not isinstance(init, AsyncSandboxInitializer): - continue + # Capture the Flask app before starting the thread for database access. + flask_app = current_app._get_current_object() # type: ignore + def initialize() -> None: + with flask_app.app_context(): + try: + for init in self._initializers: + if not isinstance(init, AsyncSandboxInitializer): + continue + + if sandbox.is_cancelled(): + return + init.initialize(sandbox) if sandbox.is_cancelled(): return - init.initialize(sandbox) - if sandbox.is_cancelled(): - return - sandbox.mount() - sandbox.mark_ready() - except Exception as exc: - logger.exception("Failed to initialize sandbox: tenant_id=%s, app_id=%s", self._tenant_id, self._app_id) - sandbox.mark_failed(exc) + sandbox.mount() + sandbox.mark_ready() + except Exception as exc: + logger.exception( + "Failed to initialize sandbox: tenant_id=%s, app_id=%s", self._tenant_id, self._app_id + ) + sandbox.mark_failed(exc) # Background init completes or signals failure via sandbox state. threading.Thread(target=initialize, daemon=True).start() diff --git a/api/core/sandbox/initializer/skill_initializer.py b/api/core/sandbox/initializer/skill_initializer.py index a5a0b94fe7..92375db78a 100644 --- a/api/core/sandbox/initializer/skill_initializer.py +++ b/api/core/sandbox/initializer/skill_initializer.py @@ -30,13 +30,6 @@ class SkillInitializer(SyncSandboxInitializer): self._app_id, self._assets_id, ) - if bundle is None: - raise ValueError( - f"No skill bundle found for tenant_id={self._tenant_id}, " - f"app_id={self._app_id}, " - f"assets_id={self._assets_id}" - ) - sandbox.attrs.set( SkillAttrs.BUNDLE, bundle, diff --git a/api/core/sandbox/manager.py b/api/core/sandbox/manager.py index d832b8b6e0..5a0881e46f 100644 --- a/api/core/sandbox/manager.py +++ b/api/core/sandbox/manager.py @@ -7,6 +7,7 @@ from typing import Final from core.sandbox.builder import SandboxBuilder from core.sandbox.entities import AppAssets, SandboxType from core.sandbox.entities.providers import SandboxProviderEntity +from core.sandbox.initializer.app_assets_attrs_loader import AppAssetsAttrsInitializer from core.sandbox.initializer.app_assets_initializer import AppAssetsInitializer from core.sandbox.initializer.dify_cli_initializer import DifyCliInitializer from core.sandbox.initializer.draft_app_assets_initializer import DraftAppAssetsInitializer @@ -123,6 +124,7 @@ class SandboxManager: .options(sandbox_provider.config) .user(user_id) .app(app_id) + .initializer(AppAssetsAttrsInitializer(tenant_id, app_id, assets.id)) .initializer(AppAssetsInitializer(tenant_id, app_id, assets.id)) .initializer(DifyCliInitializer(tenant_id, user_id, app_id, assets.id)) .initializer(SkillInitializer(tenant_id, user_id, app_id, assets.id)) @@ -160,6 +162,7 @@ class SandboxManager: .options(sandbox_provider.config) .user(user_id) .app(app_id) + .initializer(AppAssetsAttrsInitializer(tenant_id, app_id, assets.id)) .initializer(DraftAppAssetsInitializer(tenant_id, app_id, assets.id)) .initializer(DifyCliInitializer(tenant_id, user_id, app_id, assets.id)) .initializer(SkillInitializer(tenant_id, user_id, app_id, assets.id)) @@ -191,6 +194,7 @@ class SandboxManager: .options(sandbox_provider.config) .user(user_id) .app(app_id) + .initializer(AppAssetsAttrsInitializer(tenant_id, app_id, assets.id)) .initializer(AppAssetsInitializer(tenant_id, app_id, assets.id)) .initializer(DifyCliInitializer(tenant_id, user_id, app_id, assets.id)) .initializer(SkillInitializer(tenant_id, user_id, app_id, assets.id)) diff --git a/api/core/skill/skill_manager.py b/api/core/skill/skill_manager.py index ca5eb81864..8bb931646a 100644 --- a/api/core/skill/skill_manager.py +++ b/api/core/skill/skill_manager.py @@ -13,14 +13,14 @@ class SkillManager: tenant_id: str, app_id: str, assets_id: str, - ) -> SkillBundle | None: - key = AssetPaths.build_skill_artifact_set(tenant_id, app_id, assets_id) + ) -> SkillBundle: + key = AssetPaths.build_skill_bundle(tenant_id, app_id, assets_id) try: data = storage.load_once(key) return SkillBundle.model_validate_json(data) except Exception: logger.info("Skill bundle missing or invalid for assets_id=%s", assets_id) - return None + return SkillBundle(assets_id=assets_id) @staticmethod def save_bundle( @@ -29,5 +29,5 @@ class SkillManager: assets_id: str, bundle: SkillBundle, ) -> None: - key = AssetPaths.build_skill_artifact_set(tenant_id, app_id, assets_id) + key = AssetPaths.build_skill_bundle(tenant_id, app_id, assets_id) storage.save(key, bundle.model_dump_json(indent=2).encode("utf-8")) diff --git a/api/core/workflow/nodes/base/node.py b/api/core/workflow/nodes/base/node.py index 53cfbba5ef..b0fafe0ba8 100644 --- a/api/core/workflow/nodes/base/node.py +++ b/api/core/workflow/nodes/base/node.py @@ -295,7 +295,7 @@ class Node(Generic[NodeDataT]): Nested nodes are nodes with parent_node_id == self._node_id. They are executed before the main node to extract values from list[PromptMessage]. """ - from core.workflow.nodes.node_factory import DifyNodeFactory + from core.app.workflow.node_factory import DifyNodeFactory extractor_configs = self._find_extractor_node_configs() logger.debug("[NestedNode] Found %d nested nodes for parent '%s'", len(extractor_configs), self._node_id) diff --git a/api/core/workflow/nodes/llm/node.py b/api/core/workflow/nodes/llm/node.py index 6951d1ad79..bf287575f3 100644 --- a/api/core/workflow/nodes/llm/node.py +++ b/api/core/workflow/nodes/llm/node.py @@ -1509,7 +1509,9 @@ class LLMNode(Node[LLMNodeData]): if bundle is not None and file_tree is not None: skill_entry = SkillCompiler().compile_one( bundle=bundle, - document=SkillDocument(skill_id="anonymous", content=result_text, metadata={}), + document=SkillDocument( + skill_id="anonymous", content=result_text, metadata=message.metadata or {} + ), file_tree=file_tree, base_path=AppAssets.PATH, ) @@ -1548,7 +1550,9 @@ class LLMNode(Node[LLMNodeData]): if plain_text and bundle is not None and file_tree is not None: skill_entry = SkillCompiler().compile_one( bundle=bundle, - document=SkillDocument(skill_id="anonymous", content=plain_text, metadata={}), + document=SkillDocument( + skill_id="anonymous", content=plain_text, metadata=message.metadata or {} + ), file_tree=file_tree, base_path=AppAssets.PATH, ) @@ -1821,7 +1825,7 @@ class LLMNode(Node[LLMNodeData]): if isinstance(prompt, LLMNodeChatModelMessage): skill_entry = SkillCompiler().compile_one( bundle=bundle, - document=SkillDocument(skill_id="anonymous", content=prompt.text, metadata={}), + document=SkillDocument(skill_id="anonymous", content=prompt.text, metadata=prompt.metadata or {}), file_tree=file_tree, base_path=AppAssets.PATH, )