dify/api/dify_graph/graph_engine/graph_state_manager.py

"""
Graph state manager that combines node, edge, and execution tracking.
"""

import threading
from collections.abc import Sequence
from typing import TypedDict, final

from dify_graph.enums import NodeState
from dify_graph.graph import Edge, Graph

from .ready_queue import ReadyQueue


class EdgeStateAnalysis(TypedDict):
    """Analysis result for edge states."""

    has_unknown: bool
    has_taken: bool
    all_skipped: bool


@final
class GraphStateManager:
    def __init__(self, graph: Graph, ready_queue: ReadyQueue) -> None:
        """
        Initialize the state manager.

        Args:
            graph: The workflow graph
            ready_queue: Queue for nodes ready to execute
        """
        self._graph = graph
        self._ready_queue = ready_queue
        self._lock = threading.RLock()

        # Execution tracking state
        self._executing_nodes: set[str] = set()

    # ============= Node State Operations =============

    def enqueue_node(self, node_id: str) -> None:
        """
        Mark a node as TAKEN and add it to the ready queue.

        This combines the state transition and enqueueing operations
        that always occur together when preparing a node for execution.

        Args:
            node_id: The ID of the node to enqueue
        """
        with self._lock:
            self._graph.nodes[node_id].state = NodeState.TAKEN
            self._ready_queue.put(node_id)

    def mark_node_skipped(self, node_id: str) -> None:
        """
        Mark a node as SKIPPED.

        Args:
            node_id: The ID of the node to skip
        """
        with self._lock:
            self._graph.nodes[node_id].state = NodeState.SKIPPED

    def is_node_ready(self, node_id: str) -> bool:
        """
        Check if a node is ready to be executed.

        A node is ready when all its incoming edges from taken branches
        have been satisfied.

        Args:
            node_id: The ID of the node to check

        Returns:
            True if the node is ready for execution
        """
        with self._lock:
            # Get all incoming edges to this node
            incoming_edges = self._graph.get_incoming_edges(node_id)

            # If no incoming edges, node is always ready
            if not incoming_edges:
                return True

            # If any edge is UNKNOWN, node is not ready
            if any(edge.state == NodeState.UNKNOWN for edge in incoming_edges):
                return False

            # Node is ready if at least one edge is TAKEN
            return any(edge.state == NodeState.TAKEN for edge in incoming_edges)

    def get_node_state(self, node_id: str) -> NodeState:
        """
        Get the current state of a node.

        Args:
            node_id: The ID of the node

        Returns:
            The current node state
        """
        with self._lock:
            return self._graph.nodes[node_id].state

    # ============= Edge State Operations =============

    def mark_edge_taken(self, edge_id: str) -> None:
        """
        Mark an edge as TAKEN.

        Args:
            edge_id: The ID of the edge to mark
        """
        with self._lock:
            self._graph.edges[edge_id].state = NodeState.TAKEN

    def mark_edge_skipped(self, edge_id: str) -> None:
        """
        Mark an edge as SKIPPED.

        Args:
            edge_id: The ID of the edge to mark
        """
        with self._lock:
            self._graph.edges[edge_id].state = NodeState.SKIPPED

    def analyze_edge_states(self, edges: list[Edge]) -> EdgeStateAnalysis:
        """
        Analyze the states of edges and return summary flags.

        Args:
            edges: List of edges to analyze

        Returns:
            Analysis result with state flags
        """
        with self._lock:
            states = {edge.state for edge in edges}

            return EdgeStateAnalysis(
                has_unknown=NodeState.UNKNOWN in states,
                has_taken=NodeState.TAKEN in states,
                all_skipped=states == {NodeState.SKIPPED} if states else True,
            )

    def get_edge_state(self, edge_id: str) -> NodeState:
        """
        Get the current state of an edge.

        Args:
            edge_id: The ID of the edge

        Returns:
            The current edge state
        """
        with self._lock:
            return self._graph.edges[edge_id].state

    def categorize_branch_edges(self, node_id: str, selected_handle: str) -> tuple[Sequence[Edge], Sequence[Edge]]:
        """
        Categorize branch edges into selected and unselected.

        Args:
            node_id: The ID of the branch node
            selected_handle: The handle of the selected edge

        Returns:
            A tuple of (selected_edges, unselected_edges)
        """
        with self._lock:
            outgoing_edges = self._graph.get_outgoing_edges(node_id)
            selected_edges: list[Edge] = []
            unselected_edges: list[Edge] = []

            for edge in outgoing_edges:
                if edge.source_handle == selected_handle:
                    selected_edges.append(edge)
                else:
                    unselected_edges.append(edge)

            return selected_edges, unselected_edges

    # ============= Execution Tracking Operations =============

    def start_execution(self, node_id: str) -> None:
        """
        Mark a node as executing.

        Args:
            node_id: The ID of the node starting execution
        """
        with self._lock:
            self._executing_nodes.add(node_id)

    def finish_execution(self, node_id: str) -> None:
        """
        Mark a node as no longer executing.

        Args:
            node_id: The ID of the node finishing execution
        """
        with self._lock:
            self._executing_nodes.discard(node_id)

    def is_executing(self, node_id: str) -> bool:
        """
        Check if a node is currently executing.

        Args:
            node_id: The ID of the node to check

        Returns:
            True if the node is executing
        """
        with self._lock:
            return node_id in self._executing_nodes

    def get_executing_count(self) -> int:
        """
        Get the count of currently executing nodes.

        Returns:
            Number of executing nodes
        """
        # This count is a best-effort snapshot and can change concurrently.
        # Only use it for pause-drain checks where scheduling is already frozen.
        with self._lock:
            return len(self._executing_nodes)

    def get_executing_nodes(self) -> set[str]:
        """
        Get a copy of the set of executing node IDs.

        Returns:
            Set of node IDs currently executing
        """
        with self._lock:
            return self._executing_nodes.copy()

    def clear_executing(self) -> None:
        """Clear all executing nodes."""
        with self._lock:
            self._executing_nodes.clear()

    # ============= Composite Operations =============

    def is_execution_complete(self) -> bool:
        """
        Check if graph execution is complete.

        Execution is complete when:
        - Ready queue is empty
        - No nodes are executing

        Returns:
            True if execution is complete
        """
        with self._lock:
            return self._ready_queue.empty() and len(self._executing_nodes) == 0

    def get_queue_depth(self) -> int:
        """
        Get the current depth of the ready queue.

        Returns:
            Number of nodes in the ready queue
        """
        return self._ready_queue.qsize()

    def get_execution_stats(self) -> dict[str, int]:
        """
        Get execution statistics.

        Returns:
            Dictionary with execution statistics
        """
        with self._lock:
            taken_nodes = sum(1 for node in self._graph.nodes.values() if node.state == NodeState.TAKEN)
            skipped_nodes = sum(1 for node in self._graph.nodes.values() if node.state == NodeState.SKIPPED)
            unknown_nodes = sum(1 for node in self._graph.nodes.values() if node.state == NodeState.UNKNOWN)

            return {
                "queue_depth": self._ready_queue.qsize(),
                "executing": len(self._executing_nodes),
                "taken_nodes": taken_nodes,
                "skipped_nodes": skipped_nodes,
                "unknown_nodes": unknown_nodes,
            }
feat: knowledge pipeline (#25360) Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com> 2025-09-18 04:49:10 +00:00			`"""`
			`Graph state manager that combines node, edge, and execution tracking.`
			`"""`

			`import threading`
			`from collections.abc import Sequence`
			`from typing import TypedDict, final`

refactor: move workflow package to dify_graph (#32844) 2026-03-02 10:42:30 +00:00			`from dify_graph.enums import NodeState`
			`from dify_graph.graph import Edge, Graph`
feat: knowledge pipeline (#25360) Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com> 2025-09-18 04:49:10 +00:00
			`from .ready_queue import ReadyQueue`


			`class EdgeStateAnalysis(TypedDict):`
			`"""Analysis result for edge states."""`

			`has_unknown: bool`
			`has_taken: bool`
			`all_skipped: bool`


			`@final`
			`class GraphStateManager:`
			`def __init__(self, graph: Graph, ready_queue: ReadyQueue) -> None:`
			`"""`
			`Initialize the state manager.`

			`Args:`
			`graph: The workflow graph`
			`ready_queue: Queue for nodes ready to execute`
			`"""`
			`self._graph = graph`
			`self._ready_queue = ready_queue`
			`self._lock = threading.RLock()`

			`# Execution tracking state`
			`self._executing_nodes: set[str] = set()`

			`# ============= Node State Operations =============`

			`def enqueue_node(self, node_id: str) -> None:`
			`"""`
			`Mark a node as TAKEN and add it to the ready queue.`

			`This combines the state transition and enqueueing operations`
			`that always occur together when preparing a node for execution.`

			`Args:`
			`node_id: The ID of the node to enqueue`
			`"""`
			`with self._lock:`
			`self._graph.nodes[node_id].state = NodeState.TAKEN`
			`self._ready_queue.put(node_id)`

			`def mark_node_skipped(self, node_id: str) -> None:`
			`"""`
			`Mark a node as SKIPPED.`

			`Args:`
			`node_id: The ID of the node to skip`
			`"""`
			`with self._lock:`
			`self._graph.nodes[node_id].state = NodeState.SKIPPED`

			`def is_node_ready(self, node_id: str) -> bool:`
			`"""`
			`Check if a node is ready to be executed.`

			`A node is ready when all its incoming edges from taken branches`
			`have been satisfied.`

			`Args:`
			`node_id: The ID of the node to check`

			`Returns:`
			`True if the node is ready for execution`
			`"""`
			`with self._lock:`
			`# Get all incoming edges to this node`
			`incoming_edges = self._graph.get_incoming_edges(node_id)`

			`# If no incoming edges, node is always ready`
			`if not incoming_edges:`
			`return True`

			`# If any edge is UNKNOWN, node is not ready`
			`if any(edge.state == NodeState.UNKNOWN for edge in incoming_edges):`
			`return False`

			`# Node is ready if at least one edge is TAKEN`
			`return any(edge.state == NodeState.TAKEN for edge in incoming_edges)`

			`def get_node_state(self, node_id: str) -> NodeState:`
			`"""`
			`Get the current state of a node.`

			`Args:`
			`node_id: The ID of the node`

			`Returns:`
			`The current node state`
			`"""`
			`with self._lock:`
			`return self._graph.nodes[node_id].state`

			`# ============= Edge State Operations =============`

			`def mark_edge_taken(self, edge_id: str) -> None:`
			`"""`
			`Mark an edge as TAKEN.`

			`Args:`
			`edge_id: The ID of the edge to mark`
			`"""`
			`with self._lock:`
			`self._graph.edges[edge_id].state = NodeState.TAKEN`

			`def mark_edge_skipped(self, edge_id: str) -> None:`
			`"""`
			`Mark an edge as SKIPPED.`

			`Args:`
			`edge_id: The ID of the edge to mark`
			`"""`
			`with self._lock:`
			`self._graph.edges[edge_id].state = NodeState.SKIPPED`

			`def analyze_edge_states(self, edges: list[Edge]) -> EdgeStateAnalysis:`
			`"""`
			`Analyze the states of edges and return summary flags.`

			`Args:`
			`edges: List of edges to analyze`

			`Returns:`
			`Analysis result with state flags`
			`"""`
			`with self._lock:`
			`states = {edge.state for edge in edges}`

			`return EdgeStateAnalysis(`
			`has_unknown=NodeState.UNKNOWN in states,`
			`has_taken=NodeState.TAKEN in states,`
			`all_skipped=states == {NodeState.SKIPPED} if states else True,`
			`)`

			`def get_edge_state(self, edge_id: str) -> NodeState:`
			`"""`
			`Get the current state of an edge.`

			`Args:`
			`edge_id: The ID of the edge`

			`Returns:`
			`The current edge state`
			`"""`
			`with self._lock:`
			`return self._graph.edges[edge_id].state`

			`def categorize_branch_edges(self, node_id: str, selected_handle: str) -> tuple[Sequence[Edge], Sequence[Edge]]:`
			`"""`
			`Categorize branch edges into selected and unselected.`

			`Args:`
			`node_id: The ID of the branch node`
			`selected_handle: The handle of the selected edge`

			`Returns:`
			`A tuple of (selected_edges, unselected_edges)`
			`"""`
			`with self._lock:`
			`outgoing_edges = self._graph.get_outgoing_edges(node_id)`
			`selected_edges: list[Edge] = []`
			`unselected_edges: list[Edge] = []`

			`for edge in outgoing_edges:`
			`if edge.source_handle == selected_handle:`
			`selected_edges.append(edge)`
			`else:`
			`unselected_edges.append(edge)`

			`return selected_edges, unselected_edges`

			`# ============= Execution Tracking Operations =============`

			`def start_execution(self, node_id: str) -> None:`
			`"""`
			`Mark a node as executing.`

			`Args:`
			`node_id: The ID of the node starting execution`
			`"""`
			`with self._lock:`
			`self._executing_nodes.add(node_id)`

			`def finish_execution(self, node_id: str) -> None:`
			`"""`
			`Mark a node as no longer executing.`

			`Args:`
			`node_id: The ID of the node finishing execution`
			`"""`
			`with self._lock:`
			`self._executing_nodes.discard(node_id)`

			`def is_executing(self, node_id: str) -> bool:`
			`"""`
			`Check if a node is currently executing.`

			`Args:`
			`node_id: The ID of the node to check`

			`Returns:`
			`True if the node is executing`
			`"""`
			`with self._lock:`
			`return node_id in self._executing_nodes`

			`def get_executing_count(self) -> int:`
			`"""`
			`Get the count of currently executing nodes.`

			`Returns:`
			`Number of executing nodes`
			`"""`
feat: Human Input Node (#32060) The frontend and backend implementation for the human input node. Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: yyh <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zhsama <torvalds@linux.do> 2026-02-09 06:57:23 +00:00			`# This count is a best-effort snapshot and can change concurrently.`
			`# Only use it for pause-drain checks where scheduling is already frozen.`
feat: knowledge pipeline (#25360) Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com> 2025-09-18 04:49:10 +00:00			`with self._lock:`
			`return len(self._executing_nodes)`

			`def get_executing_nodes(self) -> set[str]:`
			`"""`
			`Get a copy of the set of executing node IDs.`

			`Returns:`
			`Set of node IDs currently executing`
			`"""`
			`with self._lock:`
			`return self._executing_nodes.copy()`

			`def clear_executing(self) -> None:`
			`"""Clear all executing nodes."""`
			`with self._lock:`
			`self._executing_nodes.clear()`

			`# ============= Composite Operations =============`

			`def is_execution_complete(self) -> bool:`
			`"""`
			`Check if graph execution is complete.`

			`Execution is complete when:`
			`- Ready queue is empty`
			`- No nodes are executing`

			`Returns:`
			`True if execution is complete`
			`"""`
			`with self._lock:`
			`return self._ready_queue.empty() and len(self._executing_nodes) == 0`

			`def get_queue_depth(self) -> int:`
			`"""`
			`Get the current depth of the ready queue.`

			`Returns:`
			`Number of nodes in the ready queue`
			`"""`
			`return self._ready_queue.qsize()`

			`def get_execution_stats(self) -> dict[str, int]:`
			`"""`
			`Get execution statistics.`

			`Returns:`
			`Dictionary with execution statistics`
			`"""`
			`with self._lock:`
			`taken_nodes = sum(1 for node in self._graph.nodes.values() if node.state == NodeState.TAKEN)`
			`skipped_nodes = sum(1 for node in self._graph.nodes.values() if node.state == NodeState.SKIPPED)`
			`unknown_nodes = sum(1 for node in self._graph.nodes.values() if node.state == NodeState.UNKNOWN)`

			`return {`
			`"queue_depth": self._ready_queue.qsize(),`
			`"executing": len(self._executing_nodes),`
			`"taken_nodes": taken_nodes,`
			`"skipped_nodes": skipped_nodes,`
			`"unknown_nodes": unknown_nodes,`
			`}`