diff --git a/.agents/skills/component-refactoring/SKILL.md b/.agents/skills/component-refactoring/SKILL.md
index 140e0ef434..0ed18d71d1 100644
--- a/.agents/skills/component-refactoring/SKILL.md
+++ b/.agents/skills/component-refactoring/SKILL.md
@@ -187,53 +187,12 @@ const Template = useMemo(() => {
**When**: Component directly handles API calls, data transformation, or complex async operations.
-**Dify Convention**: Use `@tanstack/react-query` hooks from `web/service/use-*.ts` or create custom data hooks.
-
-```typescript
-// ❌ Before: API logic in component
-const MCPServiceCard = () => {
- const [basicAppConfig, setBasicAppConfig] = useState({})
-
- useEffect(() => {
- if (isBasicApp && appId) {
- (async () => {
- const res = await fetchAppDetail({ url: '/apps', id: appId })
- setBasicAppConfig(res?.model_config || {})
- })()
- }
- }, [appId, isBasicApp])
-
- // More API-related logic...
-}
-
-// ✅ After: Extract to data hook using React Query
-// use-app-config.ts
-import { useQuery } from '@tanstack/react-query'
-import { get } from '@/service/base'
-
-const NAME_SPACE = 'appConfig'
-
-export const useAppConfig = (appId: string, isBasicApp: boolean) => {
- return useQuery({
- enabled: isBasicApp && !!appId,
- queryKey: [NAME_SPACE, 'detail', appId],
- queryFn: () => getFile-level coverage (click to expand)
"
- echo ""
- echo '```'
- uv run --project api coverage report -m
- echo '```'
- echo "File coverage (lowest lines first)
');
- console.log('');
- console.log('```');
- fileSummaries
- .sort((a, b) => (a.pct - b.pct) || (b.lines.total - a.lines.total))
- .slice(0, 25)
- .forEach(({ file, pct, lines }) => {
- console.log(`${pct.toFixed(2)}%\t${lines.covered}/${lines.total}\t${file}`);
- });
- console.log('```');
- console.log('Vitest coverage table
');
- console.log('');
- console.log(headerRow);
- console.log(dividerRow);
- rowsForOutput.forEach((row) => console.log(formatRow(row)));
- console.log('
Hello
", encoding="utf-8") + + extractor = HtmlExtractor(str(file_path)) + docs = extractor.extract() + + assert len(docs) == 1 + assert "".join(docs[0].page_content.split()) == "TitleHello" + + def test_load_as_text_strips_whitespace_and_handles_empty(self, tmp_path): + file_path = tmp_path / "sample.html" + file_path.write_text(" \n ", encoding="utf-8") + + extractor = HtmlExtractor(str(file_path)) + + assert extractor._load_as_text() == "" diff --git a/api/tests/unit_tests/core/rag/extractor/test_jina_reader_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_jina_reader_extractor.py new file mode 100644 index 0000000000..0b4c9bd809 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_jina_reader_extractor.py @@ -0,0 +1,47 @@ +from pytest_mock import MockerFixture + +from core.rag.extractor.jina_reader_extractor import JinaReaderWebExtractor + + +class TestJinaReaderWebExtractor: + def test_extract_crawl_mode_returns_document(self, mocker: MockerFixture): + mocker.patch( + "core.rag.extractor.jina_reader_extractor.WebsiteService.get_crawl_url_data", + return_value={ + "content": "markdown-content", + "url": "https://example.com", + "description": "desc", + "title": "title", + }, + ) + + extractor = JinaReaderWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + docs = extractor.extract() + + assert len(docs) == 1 + assert docs[0].page_content == "markdown-content" + assert docs[0].metadata == { + "source_url": "https://example.com", + "description": "desc", + "title": "title", + } + + def test_extract_crawl_mode_with_missing_data_returns_empty(self, mocker: MockerFixture): + mocker.patch( + "core.rag.extractor.jina_reader_extractor.WebsiteService.get_crawl_url_data", + return_value=None, + ) + + extractor = JinaReaderWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + + assert extractor.extract() == [] + + def test_extract_non_crawl_mode_returns_empty(self, mocker: MockerFixture): + mock_get_crawl = mocker.patch( + "core.rag.extractor.jina_reader_extractor.WebsiteService.get_crawl_url_data", + return_value={"content": "unused"}, + ) + extractor = JinaReaderWebExtractor("https://example.com", "job-1", "tenant-1", mode="scrape") + + assert extractor.extract() == [] + mock_get_crawl.assert_not_called() diff --git a/api/tests/unit_tests/core/rag/extractor/test_markdown_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_markdown_extractor.py index d4cf534c56..7e78c86c7d 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_markdown_extractor.py +++ b/api/tests/unit_tests/core/rag/extractor/test_markdown_extractor.py @@ -1,8 +1,15 @@ +from pathlib import Path +from types import SimpleNamespace + +import pytest + +import core.rag.extractor.markdown_extractor as markdown_module from core.rag.extractor.markdown_extractor import MarkdownExtractor -def test_markdown_to_tups(): - markdown = """ +class TestMarkdownExtractor: + def test_markdown_to_tups(self): + markdown = """ this is some text without header # title 1 @@ -11,12 +18,113 @@ this is balabala text ## title 2 this is more specific text. """ - extractor = MarkdownExtractor(file_path="dummy_path") - updated_output = extractor.markdown_to_tups(markdown) - assert len(updated_output) == 3 - key, header_value = updated_output[0] - assert key == None - assert header_value.strip() == "this is some text without header" - title_1, value = updated_output[1] - assert title_1.strip() == "title 1" - assert value.strip() == "this is balabala text" + extractor = MarkdownExtractor(file_path="dummy_path") + updated_output = extractor.markdown_to_tups(markdown) + + assert len(updated_output) == 3 + key, header_value = updated_output[0] + assert key is None + assert header_value.strip() == "this is some text without header" + + title_1, value = updated_output[1] + assert title_1.strip() == "title 1" + assert value.strip() == "this is balabala text" + + def test_markdown_to_tups_keeps_code_block_headers_literal(self): + markdown = """# Header +before +```python +# this is not a heading +print('x') +``` +after +""" + extractor = MarkdownExtractor(file_path="dummy_path") + + tups = extractor.markdown_to_tups(markdown) + + assert len(tups) == 2 + assert tups[1][0] == "Header" + assert "# this is not a heading" in tups[1][1] + + def test_remove_images_and_hyperlinks(self): + extractor = MarkdownExtractor(file_path="dummy_path") + + with_images = "before ![[image.png]] after" + with_links = "[OpenAI](https://openai.com)" + + assert extractor.remove_images(with_images) == "before after" + assert extractor.remove_hyperlinks(with_links) == "OpenAI" + + def test_parse_tups_reads_file_and_applies_options(self, tmp_path): + markdown_file = tmp_path / "doc.md" + markdown_file.write_text("# Header\nText with [link](https://example.com) and ![[img.png]]", encoding="utf-8") + + extractor = MarkdownExtractor( + file_path=str(markdown_file), + remove_hyperlinks=True, + remove_images=True, + autodetect_encoding=False, + ) + + tups = extractor.parse_tups(str(markdown_file)) + + assert len(tups) == 2 + assert tups[1][0] == "Header" + assert "[link]" not in tups[1][1] + assert "img.png" not in tups[1][1] + + def test_parse_tups_autodetects_encoding_after_decode_error(self, monkeypatch): + extractor = MarkdownExtractor(file_path="dummy_path", autodetect_encoding=True) + + calls: list[str | None] = [] + + def fake_read_text(self, encoding=None): + calls.append(encoding) + if encoding is None: + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "fail") + if encoding == "bad-encoding": + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "fail") + return "# H\ncontent" + + monkeypatch.setattr(Path, "read_text", fake_read_text, raising=True) + monkeypatch.setattr( + markdown_module, + "detect_file_encodings", + lambda _: [SimpleNamespace(encoding="bad-encoding"), SimpleNamespace(encoding="utf-8")], + ) + + tups = extractor.parse_tups("dummy_path") + + assert len(tups) == 2 + assert calls == [None, "bad-encoding", "utf-8"] + + def test_parse_tups_decode_error_with_autodetect_disabled_raises(self, monkeypatch): + extractor = MarkdownExtractor(file_path="dummy_path", autodetect_encoding=False) + + def raise_decode(self, encoding=None): + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "fail") + + monkeypatch.setattr(Path, "read_text", raise_decode, raising=True) + + with pytest.raises(RuntimeError, match="Error loading dummy_path"): + extractor.parse_tups("dummy_path") + + def test_parse_tups_other_exceptions_are_wrapped(self, monkeypatch): + extractor = MarkdownExtractor(file_path="dummy_path") + + def raise_other(self, encoding=None): + raise OSError("disk error") + + monkeypatch.setattr(Path, "read_text", raise_other, raising=True) + + with pytest.raises(RuntimeError, match="Error loading dummy_path"): + extractor.parse_tups("dummy_path") + + def test_extract_builds_documents_for_header_and_non_header(self, monkeypatch): + extractor = MarkdownExtractor(file_path="dummy_path") + monkeypatch.setattr(extractor, "parse_tups", lambda _: [(None, "plain"), ("Header", "value")]) + + docs = extractor.extract() + + assert [doc.page_content for doc in docs] == ["plain", "\n\nHeader\nvalue"] diff --git a/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py index 58bec7d19e..6daee11f8f 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py +++ b/api/tests/unit_tests/core/rag/extractor/test_notion_extractor.py @@ -1,93 +1,499 @@ +from types import SimpleNamespace from unittest import mock +import httpx +import pytest from pytest_mock import MockerFixture from core.rag.extractor import notion_extractor -user_id = "user1" -database_id = "database1" -page_id = "page1" - -extractor = notion_extractor.NotionExtractor( - notion_workspace_id="x", notion_obj_id="x", notion_page_type="page", tenant_id="x", notion_access_token="x" -) - - -def _generate_page(page_title: str): - return { - "object": "page", - "id": page_id, - "properties": { - "Page": { - "type": "title", - "title": [{"type": "text", "text": {"content": page_title}, "plain_text": page_title}], - } - }, - } - - -def _generate_block(block_id: str, block_type: str, block_text: str): - return { - "object": "block", - "id": block_id, - "parent": {"type": "page_id", "page_id": page_id}, - "type": block_type, - "has_children": False, - block_type: { - "rich_text": [ - { - "type": "text", - "text": {"content": block_text}, - "plain_text": block_text, - } - ] - }, - } - - -def _mock_response(data): +def _mock_response(data, status_code: int = 200, text: str = ""): response = mock.Mock() - response.status_code = 200 + response.status_code = status_code + response.text = text response.json.return_value = data return response -def _remove_multiple_new_lines(text): - while "\n\n" in text: - text = text.replace("\n\n", "\n") - return text.strip() +class TestNotionExtractorInitAndPublicMethods: + def test_init_with_explicit_token(self): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + assert extractor._notion_access_token == "token" + + def test_init_falls_back_to_env_token_when_credential_lookup_fails(self, monkeypatch): + monkeypatch.setattr( + notion_extractor.NotionExtractor, + "_get_access_token", + classmethod(lambda cls, tenant_id, credential_id: (_ for _ in ()).throw(Exception("credential error"))), + ) + monkeypatch.setattr(notion_extractor.dify_config, "NOTION_INTEGRATION_TOKEN", "env-token", raising=False) + + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + credential_id="cred", + ) + + assert extractor._notion_access_token == "env-token" + + def test_init_raises_if_no_credential_and_no_env_token(self, monkeypatch): + monkeypatch.setattr( + notion_extractor.NotionExtractor, + "_get_access_token", + classmethod(lambda cls, tenant_id, credential_id: (_ for _ in ()).throw(Exception("credential error"))), + ) + monkeypatch.setattr(notion_extractor.dify_config, "NOTION_INTEGRATION_TOKEN", None, raising=False) + + with pytest.raises(ValueError, match="Must specify `integration_token`"): + notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + credential_id="cred", + ) + + def test_extract_updates_last_edited_and_loads_documents(self, monkeypatch): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + update_mock = mock.Mock() + load_mock = mock.Mock(return_value=[SimpleNamespace(page_content="doc")]) + monkeypatch.setattr(extractor, "update_last_edited_time", update_mock) + monkeypatch.setattr(extractor, "_load_data_as_documents", load_mock) + + docs = extractor.extract() + + update_mock.assert_called_once_with(None) + load_mock.assert_called_once_with("obj", "page") + assert len(docs) == 1 + + def test_load_data_as_documents_page_database_and_invalid(self, monkeypatch): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + monkeypatch.setattr(extractor, "_get_notion_block_data", lambda _: ["line1", "line2"]) + page_docs = extractor._load_data_as_documents("page-id", "page") + assert page_docs[0].page_content == "line1\nline2" + + monkeypatch.setattr(extractor, "_get_notion_database_data", lambda _: [SimpleNamespace(page_content="db")]) + db_docs = extractor._load_data_as_documents("db-id", "database") + assert db_docs[0].page_content == "db" + + with pytest.raises(ValueError, match="notion page type not supported"): + extractor._load_data_as_documents("obj", "unsupported") -def test_notion_page(mocker: MockerFixture): - texts = ["Head 1", "1.1", "paragraph 1", "1.1.1"] - mocked_notion_page = { - "object": "list", - "results": [ - _generate_block("b1", "heading_1", texts[0]), - _generate_block("b2", "heading_2", texts[1]), - _generate_block("b3", "paragraph", texts[2]), - _generate_block("b4", "heading_3", texts[3]), - ], - "next_cursor": None, - } - mocker.patch("httpx.request", return_value=_mock_response(mocked_notion_page)) +class TestNotionDatabase: + def test_get_notion_database_data_parses_property_types_and_pagination(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="database", + tenant_id="tenant", + notion_access_token="token", + ) - page_docs = extractor._load_data_as_documents(page_id, "page") - assert len(page_docs) == 1 - content = _remove_multiple_new_lines(page_docs[0].page_content) - assert content == "# Head 1\n## 1.1\nparagraph 1\n### 1.1.1" + first_page = { + "results": [ + { + "properties": { + "tags": { + "type": "multi_select", + "multi_select": [{"name": "A"}, {"name": "B"}], + }, + "title_prop": {"type": "title", "title": [{"plain_text": "Title"}]}, + "empty_title": {"type": "title", "title": []}, + "rich": {"type": "rich_text", "rich_text": [{"plain_text": "RichText"}]}, + "empty_rich": {"type": "rich_text", "rich_text": []}, + "select_prop": {"type": "select", "select": {"name": "Selected"}}, + "empty_select": {"type": "select", "select": None}, + "status_prop": {"type": "status", "status": {"name": "Open"}}, + "empty_status": {"type": "status", "status": None}, + "number_prop": {"type": "number", "number": 10}, + "dict_prop": {"type": "date", "date": {"start": "2024-01-01", "end": None}}, + }, + "url": "https://notion.so/page-1", + } + ], + "has_more": True, + "next_cursor": "cursor-2", + } + second_page = {"results": [], "has_more": False, "next_cursor": None} + + mock_post = mocker.patch("httpx.post", side_effect=[_mock_response(first_page), _mock_response(second_page)]) + + docs = extractor._get_notion_database_data("db-1", query_dict={"filter": {"x": 1}}) + + assert len(docs) == 1 + content = docs[0].page_content + assert "tags:['A', 'B']" in content + assert "title_prop:Title" in content + assert "rich:RichText" in content + assert "number_prop:10" in content + assert "dict_prop:start:2024-01-01" in content + assert "Row Page URL:https://notion.so/page-1" in content + assert mock_post.call_count == 2 + + def test_get_notion_database_data_handles_missing_results_and_empty_content(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="database", + tenant_id="tenant", + notion_access_token="token", + ) + + mocker.patch("httpx.post", return_value=_mock_response({"results": None})) + assert extractor._get_notion_database_data("db-1") == [] + + def test_get_notion_database_data_requires_access_token(self): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="database", + tenant_id="tenant", + notion_access_token="token", + ) + extractor._notion_access_token = None + + with pytest.raises(AssertionError, match="Notion access token is required"): + extractor._get_notion_database_data("db-1") -def test_notion_database(mocker: MockerFixture): - page_title_list = ["page1", "page2", "page3"] - mocked_notion_database = { - "object": "list", - "results": [_generate_page(i) for i in page_title_list], - "next_cursor": None, - } - mocker.patch("httpx.post", return_value=_mock_response(mocked_notion_database)) - database_docs = extractor._load_data_as_documents(database_id, "database") - assert len(database_docs) == 1 - content = _remove_multiple_new_lines(database_docs[0].page_content) - assert content == "\n".join([f"Page:{i}" for i in page_title_list]) +class TestNotionBlocks: + def test_get_notion_block_data_success_with_table_headings_children_and_pagination(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + first_response = { + "results": [ + {"type": "table", "id": "tbl-1", "has_children": False, "table": {}}, + { + "type": "heading_1", + "id": "h1", + "has_children": False, + "heading_1": {"rich_text": [{"text": {"content": "Heading"}}]}, + }, + { + "type": "paragraph", + "id": "p1", + "has_children": True, + "paragraph": {"rich_text": [{"text": {"content": "Paragraph"}}]}, + }, + { + "type": "child_page", + "id": "cp1", + "has_children": True, + "child_page": {"rich_text": []}, + }, + ], + "next_cursor": "cursor-2", + } + second_response = { + "results": [ + { + "type": "heading_2", + "id": "h2", + "has_children": False, + "heading_2": {"rich_text": [{"text": {"content": "SubHeading"}}]}, + } + ], + "next_cursor": None, + } + + mocker.patch("httpx.request", side_effect=[_mock_response(first_response), _mock_response(second_response)]) + mocker.patch.object(extractor, "_read_table_rows", return_value="TABLE") + mocker.patch.object(extractor, "_read_block", return_value="CHILD") + + lines = extractor._get_notion_block_data("page-1") + + assert lines[0] == "TABLE\n\n" + assert "# Heading" in lines[1] + assert "Paragraph\nCHILD\n\n" in lines[2] + assert "## SubHeading" in lines[-1] + + def test_get_notion_block_data_handles_http_error_and_invalid_payload(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + mocker.patch("httpx.request", side_effect=httpx.HTTPError("network")) + with pytest.raises(ValueError, match="Error fetching Notion block data"): + extractor._get_notion_block_data("page-1") + + mocker.patch("httpx.request", return_value=_mock_response({"bad": "payload"}, status_code=200)) + with pytest.raises(ValueError, match="Error fetching Notion block data"): + extractor._get_notion_block_data("page-1") + + mocker.patch("httpx.request", return_value=_mock_response({"results": []}, status_code=500, text="boom")) + with pytest.raises(ValueError, match="Error fetching Notion block data: boom"): + extractor._get_notion_block_data("page-1") + + def test_read_block_supports_heading_table_and_recursion(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + root_payload = { + "results": [ + { + "type": "heading_2", + "id": "h2", + "has_children": False, + "heading_2": {"rich_text": [{"text": {"content": "Root"}}]}, + }, + { + "type": "paragraph", + "id": "child-block", + "has_children": True, + "paragraph": {"rich_text": [{"text": {"content": "Parent"}}]}, + }, + {"type": "table", "id": "tbl-1", "has_children": False, "table": {}}, + ], + "next_cursor": None, + } + child_payload = { + "results": [ + { + "type": "paragraph", + "id": "leaf", + "has_children": False, + "paragraph": {"rich_text": [{"text": {"content": "Child"}}]}, + } + ], + "next_cursor": None, + } + + mocker.patch("httpx.request", side_effect=[_mock_response(root_payload), _mock_response(child_payload)]) + mocker.patch.object(extractor, "_read_table_rows", return_value="TABLE-MD") + + content = extractor._read_block("root") + + assert "## Root" in content + assert "Parent" in content + assert "Child" in content + assert "TABLE-MD" in content + + def test_read_block_breaks_on_missing_results(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + mocker.patch("httpx.request", return_value=_mock_response({"results": None, "next_cursor": None})) + + assert extractor._read_block("root") == "" + + def test_read_table_rows_formats_markdown_with_pagination(self, mocker: MockerFixture): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + page_one = { + "results": [ + { + "table_row": { + "cells": [ + [{"text": {"content": "H1"}}], + [{"text": {"content": "H2"}}], + ] + } + }, + { + "table_row": { + "cells": [ + [{"text": {"content": "R1C1"}}], + [{"text": {"content": "R1C2"}}], + ] + } + }, + ], + "next_cursor": "next", + } + page_two = { + "results": [ + { + "table_row": { + "cells": [ + [{"text": {"content": "H1"}}], + [], + ] + } + }, + { + "table_row": { + "cells": [ + [{"text": {"content": "R2C1"}}], + [{"text": {"content": "R2C2"}}], + ] + } + }, + ], + "next_cursor": None, + } + + mocker.patch("httpx.request", side_effect=[_mock_response(page_one), _mock_response(page_two)]) + + markdown = extractor._read_table_rows("tbl-1") + + assert "| H1 | H2 |" in markdown + assert "| R1C1 | R1C2 |" in markdown + assert "| H1 | |" in markdown + assert "| R2C1 | R2C2 |" in markdown + + +class TestNotionMetadataAndCredentialMethods: + def test_update_last_edited_time_no_document_model(self): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + assert extractor.update_last_edited_time(None) is None + + def test_update_last_edited_time_updates_document_and_commits(self, monkeypatch): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + + class FakeDocumentModel: + data_source_info = "data_source_info" + + update_calls = [] + + class FakeQuery: + def filter_by(self, **kwargs): + return self + + def update(self, payload): + update_calls.append(payload) + + class FakeSession: + committed = False + + def query(self, model): + assert model is FakeDocumentModel + return FakeQuery() + + def commit(self): + self.committed = True + + fake_db = SimpleNamespace(session=FakeSession()) + monkeypatch.setattr(notion_extractor, "DocumentModel", FakeDocumentModel) + monkeypatch.setattr(notion_extractor, "db", fake_db) + monkeypatch.setattr(extractor, "get_notion_last_edited_time", lambda: "2026-01-01T00:00:00.000Z") + + doc_model = SimpleNamespace(id="doc-1", data_source_info_dict={"source": "notion"}) + extractor.update_last_edited_time(doc_model) + + assert update_calls + assert fake_db.session.committed is True + + def test_get_notion_last_edited_time_uses_page_and_database_urls(self, mocker: MockerFixture): + extractor_page = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="page-id", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + request_mock = mocker.patch( + "httpx.request", return_value=_mock_response({"last_edited_time": "2025-05-01T00:00:00.000Z"}) + ) + + assert extractor_page.get_notion_last_edited_time() == "2025-05-01T00:00:00.000Z" + assert "pages/page-id" in request_mock.call_args[0][1] + + extractor_db = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="db-id", + notion_page_type="database", + tenant_id="tenant", + notion_access_token="token", + ) + request_mock = mocker.patch( + "httpx.request", return_value=_mock_response({"last_edited_time": "2025-06-01T00:00:00.000Z"}) + ) + + assert extractor_db.get_notion_last_edited_time() == "2025-06-01T00:00:00.000Z" + assert "databases/db-id" in request_mock.call_args[0][1] + + def test_get_notion_last_edited_time_requires_access_token(self): + extractor = notion_extractor.NotionExtractor( + notion_workspace_id="ws", + notion_obj_id="obj", + notion_page_type="page", + tenant_id="tenant", + notion_access_token="token", + ) + extractor._notion_access_token = None + + with pytest.raises(AssertionError, match="Notion access token is required"): + extractor.get_notion_last_edited_time() + + def test_get_access_token_success_and_errors(self, monkeypatch): + with pytest.raises(Exception, match="No credential id found"): + notion_extractor.NotionExtractor._get_access_token("tenant", None) + + class FakeProviderServiceMissing: + def get_datasource_credentials(self, **kwargs): + return None + + monkeypatch.setattr(notion_extractor, "DatasourceProviderService", FakeProviderServiceMissing) + with pytest.raises(Exception, match="No notion credential found"): + notion_extractor.NotionExtractor._get_access_token("tenant", "cred") + + class FakeProviderServiceFound: + def get_datasource_credentials(self, **kwargs): + return {"integration_secret": "token-from-credential"} + + monkeypatch.setattr(notion_extractor, "DatasourceProviderService", FakeProviderServiceFound) + + assert notion_extractor.NotionExtractor._get_access_token("tenant", "cred") == "token-from-credential" diff --git a/api/tests/unit_tests/core/rag/extractor/test_text_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_text_extractor.py new file mode 100644 index 0000000000..fb3c6e52c6 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/test_text_extractor.py @@ -0,0 +1,79 @@ +from pathlib import Path +from types import SimpleNamespace + +import pytest + +import core.rag.extractor.text_extractor as text_module +from core.rag.extractor.text_extractor import TextExtractor + + +class TestTextExtractor: + def test_extract_success(self, tmp_path): + file_path = tmp_path / "data.txt" + file_path.write_text("hello world", encoding="utf-8") + + extractor = TextExtractor(str(file_path)) + docs = extractor.extract() + + assert len(docs) == 1 + assert docs[0].page_content == "hello world" + assert docs[0].metadata == {"source": str(file_path)} + + def test_extract_autodetect_success_after_decode_error(self, monkeypatch): + extractor = TextExtractor("dummy.txt", autodetect_encoding=True) + + calls = [] + + def fake_read_text(self, encoding=None): + calls.append(encoding) + if encoding is None: + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode") + if encoding == "bad": + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode") + return "decoded text" + + monkeypatch.setattr(Path, "read_text", fake_read_text, raising=True) + monkeypatch.setattr( + text_module, + "detect_file_encodings", + lambda _: [SimpleNamespace(encoding="bad"), SimpleNamespace(encoding="utf-8")], + ) + + docs = extractor.extract() + + assert docs[0].page_content == "decoded text" + assert calls == [None, "bad", "utf-8"] + + def test_extract_autodetect_all_fail_raises_runtime_error(self, monkeypatch): + extractor = TextExtractor("dummy.txt", autodetect_encoding=True) + + def always_decode_error(self, encoding=None): + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode") + + monkeypatch.setattr(Path, "read_text", always_decode_error, raising=True) + monkeypatch.setattr(text_module, "detect_file_encodings", lambda _: [SimpleNamespace(encoding="latin-1")]) + + with pytest.raises(RuntimeError, match="all detected encodings failed"): + extractor.extract() + + def test_extract_decode_error_without_autodetect_raises_runtime_error(self, monkeypatch): + extractor = TextExtractor("dummy.txt", autodetect_encoding=False) + + def always_decode_error(self, encoding=None): + raise UnicodeDecodeError("utf-8", b"x", 0, 1, "decode") + + monkeypatch.setattr(Path, "read_text", always_decode_error, raising=True) + + with pytest.raises(RuntimeError, match="specified encoding failed"): + extractor.extract() + + def test_extract_wraps_non_decode_exceptions(self, monkeypatch): + extractor = TextExtractor("dummy.txt") + + def raise_other(self, encoding=None): + raise OSError("io error") + + monkeypatch.setattr(Path, "read_text", raise_other, raising=True) + + with pytest.raises(RuntimeError, match="Error loading dummy.txt"): + extractor.extract() diff --git a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py index 0792ada194..64eb89590a 100644 --- a/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py +++ b/api/tests/unit_tests/core/rag/extractor/test_word_extractor.py @@ -3,9 +3,12 @@ import io import os import tempfile +from collections import UserDict from pathlib import Path from types import SimpleNamespace +from unittest.mock import MagicMock +import pytest from docx import Document from docx.oxml import OxmlElement from docx.oxml.ns import qn @@ -136,7 +139,7 @@ def test_extract_images_from_docx(monkeypatch): monkeypatch.setattr(we, "UploadFile", FakeUploadFile) # Patch external image fetcher - def fake_get(url: str): + def fake_get(url: str, **kwargs): assert url == "https://example.com/image.png" return SimpleNamespace(status_code=200, headers={"Content-Type": "image/png"}, content=external_bytes) @@ -203,10 +206,8 @@ def test_extract_images_from_docx_uses_internal_files_url(): finally: # Restore original values - if original_files_url is not None: - dify_config.FILES_URL = original_files_url - if original_internal_files_url is not None: - dify_config.INTERNAL_FILES_URL = original_internal_files_url + dify_config.FILES_URL = original_files_url + dify_config.INTERNAL_FILES_URL = original_internal_files_url def test_extract_hyperlinks(monkeypatch): @@ -314,3 +315,405 @@ def test_extract_legacy_hyperlinks(monkeypatch): finally: if os.path.exists(tmp_path): os.remove(tmp_path) + + +def test_init_rejects_invalid_url_status(monkeypatch): + class FakeResponse: + status_code = 404 + content = b"" + closed = False + + def close(self): + self.closed = True + + fake_response = FakeResponse() + monkeypatch.setattr(we, "ssrf_proxy", SimpleNamespace(get=lambda url, **kwargs: fake_response)) + + with pytest.raises(ValueError, match="returned status code 404"): + WordExtractor("https://example.com/missing.docx", "tenant", "user") + + assert fake_response.closed is True + + +def test_init_expands_home_path_and_invalid_local_path(monkeypatch, tmp_path): + target_file = tmp_path / "expanded.docx" + target_file.write_bytes(b"docx") + + monkeypatch.setattr(we.os.path, "expanduser", lambda p: str(target_file)) + monkeypatch.setattr( + we.os.path, + "isfile", + lambda p: p == str(target_file), + ) + + extractor = WordExtractor("~/expanded.docx", "tenant", "user") + assert extractor.file_path == str(target_file) + + monkeypatch.setattr(we.os.path, "isfile", lambda p: False) + with pytest.raises(ValueError, match="is not a valid file or url"): + WordExtractor("not-a-file", "tenant", "user") + + +def test_del_closes_temp_file(): + extractor = object.__new__(WordExtractor) + extractor.temp_file = MagicMock() + + WordExtractor.__del__(extractor) + + extractor.temp_file.close.assert_called_once() + + +def test_extract_images_handles_invalid_external_cases(monkeypatch): + class FakeTargetRef: + def __contains__(self, item): + return item == "image" + + def split(self, sep): + return [None] + + rel_invalid_url = SimpleNamespace(is_external=True, target_ref="image-no-url") + rel_request_error = SimpleNamespace(is_external=True, target_ref="https://example.com/image-error") + rel_unknown_mime = SimpleNamespace(is_external=True, target_ref="https://example.com/image-unknown") + rel_internal_none_ext = SimpleNamespace(is_external=False, target_ref=FakeTargetRef(), target_part=object()) + + doc = SimpleNamespace( + part=SimpleNamespace( + rels={ + "r1": rel_invalid_url, + "r2": rel_request_error, + "r3": rel_unknown_mime, + "r4": rel_internal_none_ext, + } + ) + ) + + def fake_get(url, **kwargs): + if "image-error" in url: + raise RuntimeError("network") + return SimpleNamespace(status_code=200, headers={"Content-Type": "application/unknown"}, content=b"x") + + monkeypatch.setattr(we, "ssrf_proxy", SimpleNamespace(get=fake_get)) + db_stub = SimpleNamespace(session=SimpleNamespace(add=lambda obj: None, commit=MagicMock())) + monkeypatch.setattr(we, "db", db_stub) + monkeypatch.setattr(we, "storage", SimpleNamespace(save=lambda key, data: None)) + monkeypatch.setattr(we.dify_config, "FILES_URL", "http://files.local", raising=False) + + extractor = object.__new__(WordExtractor) + extractor.tenant_id = "tenant" + extractor.user_id = "user" + + result = extractor._extract_images_from_docx(doc) + + assert result == {} + db_stub.session.commit.assert_called_once() + + +def test_table_to_markdown_and_parse_helpers(monkeypatch): + extractor = object.__new__(WordExtractor) + + table = SimpleNamespace( + rows=[ + SimpleNamespace(cells=[1, 2]), + SimpleNamespace(cells=[3, 4]), + ] + ) + parse_row_mock = MagicMock(side_effect=[["H1", "H2"], ["A", "B"]]) + monkeypatch.setattr(extractor, "_parse_row", parse_row_mock) + + markdown = extractor._table_to_markdown(table, {}) + assert markdown == "| H1 | H2 |\n| --- | --- |\n| A | B |" + + class FakeBlip: + def __init__(self, image_id): + self.image_id = image_id + + def get(self, key): + return self.image_id + + class FakeRunChild: + def __init__(self, blips, text=""): + self._blips = blips + self.text = text + self.tag = qn("w:r") + + def xpath(self, pattern): + if pattern == ".//a:blip": + return self._blips + return [] + + class FakeRun: + def __init__(self, element, paragraph): + # Mirror the subset used by _parse_cell_paragraph + self.element = element + self.text = getattr(element, "text", "") + + # Patch we.Run so our lightweight child objects work with the extractor + monkeypatch.setattr(we, "Run", FakeRun) + + image_part = object() + paragraph = SimpleNamespace( + _element=[ + FakeRunChild([FakeBlip(None), FakeBlip("ext"), FakeBlip("int")], text=""), + FakeRunChild([], text="plain"), + ], + part=SimpleNamespace( + rels={ + "ext": SimpleNamespace(is_external=True), + "int": SimpleNamespace(is_external=False, target_part=image_part), + } + ), + ) + + image_map = {"ext": "EXT-IMG", image_part: "INT-IMG"} + assert extractor._parse_cell_paragraph(paragraph, image_map) == "EXT-IMGINT-IMGplain" + + cell = SimpleNamespace(paragraphs=[paragraph, paragraph]) + assert extractor._parse_cell(cell, image_map) == "EXT-IMGINT-IMGplain" + + +def test_parse_docx_covers_drawing_shapes_hyperlink_error_and_table_branch(monkeypatch): + extractor = object.__new__(WordExtractor) + + ext_image_id = "ext-image" + int_embed_id = "int-embed" + shape_ext_id = "shape-ext" + shape_int_id = "shape-int" + + internal_part = object() + shape_internal_part = object() + + class Rels(UserDict): + def get(self, key, default=None): + if key == "link-bad": + raise RuntimeError("cannot resolve relation") + return super().get(key, default) + + rels = Rels( + { + ext_image_id: SimpleNamespace(is_external=True, target_ref="https://img/ext.png"), + int_embed_id: SimpleNamespace(is_external=False, target_part=internal_part), + shape_ext_id: SimpleNamespace(is_external=True, target_ref="https://img/shape.png"), + shape_int_id: SimpleNamespace(is_external=False, target_part=shape_internal_part), + "link-ok": SimpleNamespace(is_external=True, target_ref="https://example.com"), + } + ) + + image_map = { + ext_image_id: "[EXT]", + internal_part: "[INT]", + shape_ext_id: "[SHAPE_EXT]", + shape_internal_part: "[SHAPE_INT]", + } + + class FakeBlip: + def __init__(self, embed_id): + self.embed_id = embed_id + + def get(self, key): + return self.embed_id + + class FakeDrawing: + def __init__(self, embed_ids): + self.embed_ids = embed_ids + + def findall(self, pattern): + return [FakeBlip(embed_id) for embed_id in self.embed_ids] + + class FakeNode: + def __init__(self, text=None, attrs=None): + self.text = text + self._attrs = attrs or {} + + def get(self, key): + return self._attrs.get(key) + + class FakeShape: + def __init__(self, bin_id=None, img_id=None): + self.bin_id = bin_id + self.img_id = img_id + + def find(self, pattern): + if "binData" in pattern and self.bin_id: + return FakeNode( + text="shape", + attrs={"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id": self.bin_id}, + ) + if "imagedata" in pattern and self.img_id: + return FakeNode(attrs={"id": self.img_id}) + return None + + class FakeChild: + def __init__( + self, + tag, + text="", + fld_chars=None, + instr_texts=None, + drawings=None, + shapes=None, + attrs=None, + hyperlink_runs=None, + ): + self.tag = tag + self.text = text + self._fld_chars = fld_chars or [] + self._instr_texts = instr_texts or [] + self._drawings = drawings or [] + self._shapes = shapes or [] + self._attrs = attrs or {} + self._hyperlink_runs = hyperlink_runs or [] + + def findall(self, pattern): + if pattern == qn("w:fldChar"): + return self._fld_chars + if pattern == qn("w:instrText"): + return self._instr_texts + if pattern == qn("w:r"): + return self._hyperlink_runs + if pattern.endswith("}drawing"): + return self._drawings + if pattern.endswith("}pict"): + return self._shapes + return [] + + def get(self, key): + return self._attrs.get(key) + + class FakeRun: + def __init__(self, element, paragraph): + self.element = element + self.text = getattr(element, "text", "") + + paragraph_main = SimpleNamespace( + _element=[ + FakeChild( + qn("w:r"), + text="run-text", + drawings=[FakeDrawing([ext_image_id, int_embed_id])], + shapes=[FakeShape(bin_id=shape_ext_id, img_id=shape_int_id)], + ), + FakeChild( + qn("w:r"), + text="", + drawings=[], + shapes=[FakeShape(bin_id=shape_ext_id)], + ), + FakeChild( + qn("w:hyperlink"), + attrs={qn("r:id"): "link-ok"}, + hyperlink_runs=[FakeChild(qn("w:r"), text="LinkText")], + ), + FakeChild( + qn("w:hyperlink"), + attrs={qn("r:id"): "link-bad"}, + hyperlink_runs=[FakeChild(qn("w:r"), text="BrokenLink")], + ), + ] + ) + paragraph_empty = SimpleNamespace(_element=[FakeChild(qn("w:r"), text=" ")]) + + fake_doc = SimpleNamespace( + part=SimpleNamespace(rels=rels, related_parts={int_embed_id: internal_part}), + paragraphs=[paragraph_main, paragraph_empty], + tables=[SimpleNamespace(rows=[])], + element=SimpleNamespace( + body=[SimpleNamespace(tag="w:p"), SimpleNamespace(tag="w:p"), SimpleNamespace(tag="w:tbl")] + ), + ) + + monkeypatch.setattr(we, "DocxDocument", lambda _: fake_doc) + monkeypatch.setattr(we, "Run", FakeRun) + monkeypatch.setattr(extractor, "_extract_images_from_docx", lambda doc: image_map) + monkeypatch.setattr(extractor, "_table_to_markdown", lambda table, image_map: "TABLE-MARKDOWN") + logger_exception = MagicMock() + monkeypatch.setattr(we.logger, "exception", logger_exception) + + content = extractor.parse_docx("dummy.docx") + + assert "[EXT]" in content + assert "[INT]" in content + assert "[SHAPE_EXT]" in content + assert "[LinkText](https://example.com)" in content + assert "BrokenLink" in content + assert "TABLE-MARKDOWN" in content + logger_exception.assert_called_once() + + +def test_parse_cell_paragraph_hyperlink_in_table_cell_http(): + doc = Document() + table = doc.add_table(rows=1, cols=1) + cell = table.cell(0, 0) + p = cell.paragraphs[0] + + # Build modern hyperlink inside table cell + r_id = "rIdHttp1" + hyperlink = OxmlElement("w:hyperlink") + hyperlink.set(qn("r:id"), r_id) + + run_elem = OxmlElement("w:r") + t = OxmlElement("w:t") + t.text = "Dify" + run_elem.append(t) + hyperlink.append(run_elem) + p._p.append(hyperlink) + + # Relationship for external http link + doc.part.rels.add_relationship( + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink", + "https://dify.ai", + r_id, + is_external=True, + ) + + with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp: + doc.save(tmp.name) + tmp_path = tmp.name + + try: + reopened = Document(tmp_path) + para = reopened.tables[0].cell(0, 0).paragraphs[0] + extractor = object.__new__(WordExtractor) + out = extractor._parse_cell_paragraph(para, {}) + assert out == "[Dify](https://dify.ai)" + finally: + if os.path.exists(tmp_path): + os.remove(tmp_path) + + +def test_parse_cell_paragraph_hyperlink_in_table_cell_mailto(): + doc = Document() + table = doc.add_table(rows=1, cols=1) + cell = table.cell(0, 0) + p = cell.paragraphs[0] + + r_id = "rIdMail1" + hyperlink = OxmlElement("w:hyperlink") + hyperlink.set(qn("r:id"), r_id) + + run_elem = OxmlElement("w:r") + t = OxmlElement("w:t") + t.text = "john@test.com" + run_elem.append(t) + hyperlink.append(run_elem) + p._p.append(hyperlink) + + doc.part.rels.add_relationship( + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink", + "mailto:john@test.com", + r_id, + is_external=True, + ) + + with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp: + doc.save(tmp.name) + tmp_path = tmp.name + + try: + reopened = Document(tmp_path) + para = reopened.tables[0].cell(0, 0).paragraphs[0] + extractor = object.__new__(WordExtractor) + out = extractor._parse_cell_paragraph(para, {}) + assert out == "[john@test.com](mailto:john@test.com)" + finally: + if os.path.exists(tmp_path): + os.remove(tmp_path) diff --git a/api/tests/unit_tests/core/rag/extractor/unstructured/test_unstructured_extractors.py b/api/tests/unit_tests/core/rag/extractor/unstructured/test_unstructured_extractors.py new file mode 100644 index 0000000000..26ce333e11 --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/unstructured/test_unstructured_extractors.py @@ -0,0 +1,300 @@ +"""Unit tests for unstructured extractors and their local/API partitioning paths.""" + +import base64 +import sys +import types +from types import SimpleNamespace + +import pytest + +import core.rag.extractor.unstructured.unstructured_epub_extractor as epub_module +from core.rag.extractor.unstructured.unstructured_doc_extractor import UnstructuredWordExtractor +from core.rag.extractor.unstructured.unstructured_eml_extractor import UnstructuredEmailExtractor +from core.rag.extractor.unstructured.unstructured_epub_extractor import UnstructuredEpubExtractor +from core.rag.extractor.unstructured.unstructured_markdown_extractor import UnstructuredMarkdownExtractor +from core.rag.extractor.unstructured.unstructured_msg_extractor import UnstructuredMsgExtractor +from core.rag.extractor.unstructured.unstructured_ppt_extractor import UnstructuredPPTExtractor +from core.rag.extractor.unstructured.unstructured_pptx_extractor import UnstructuredPPTXExtractor +from core.rag.extractor.unstructured.unstructured_xml_extractor import UnstructuredXmlExtractor + + +def _register_module(monkeypatch: pytest.MonkeyPatch, name: str, **attrs: object) -> types.ModuleType: + module = types.ModuleType(name) + for k, v in attrs.items(): + setattr(module, k, v) + monkeypatch.setitem(sys.modules, name, module) + return module + + +def _register_unstructured_packages(monkeypatch: pytest.MonkeyPatch) -> None: + _register_module(monkeypatch, "unstructured", __path__=[]) + _register_module(monkeypatch, "unstructured.partition", __path__=[]) + _register_module(monkeypatch, "unstructured.chunking", __path__=[]) + _register_module(monkeypatch, "unstructured.file_utils", __path__=[]) + + +def _install_chunk_by_title(monkeypatch: pytest.MonkeyPatch, chunks: list[SimpleNamespace]) -> None: + _register_unstructured_packages(monkeypatch) + + def chunk_by_title( + elements: list[SimpleNamespace], max_characters: int, combine_text_under_n_chars: int + ) -> list[SimpleNamespace]: + return chunks + + _register_module(monkeypatch, "unstructured.chunking.title", chunk_by_title=chunk_by_title) + + +class TestUnstructuredMarkdownMsgXml: + def test_markdown_extractor_without_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text=" chunk-1 "), SimpleNamespace(text=" chunk-2 ")]) + _register_module( + monkeypatch, "unstructured.partition.md", partition_md=lambda filename: [SimpleNamespace(text="x")] + ) + + docs = UnstructuredMarkdownExtractor("/tmp/file.md").extract() + + assert [doc.page_content for doc in docs] == ["chunk-1", "chunk-2"] + + def test_markdown_extractor_with_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text=" via-api ")]) + calls = {} + + def partition_via_api(filename, api_url, api_key): + calls.update({"filename": filename, "api_url": api_url, "api_key": api_key}) + return [SimpleNamespace(text="ignored")] + + _register_module(monkeypatch, "unstructured.partition.api", partition_via_api=partition_via_api) + + docs = UnstructuredMarkdownExtractor("/tmp/file.md", api_url="https://u", api_key="k").extract() + + assert docs[0].page_content == "via-api" + assert calls == {"filename": "/tmp/file.md", "api_url": "https://u", "api_key": "k"} + + def test_msg_extractor_local(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="msg-doc")]) + _register_module( + monkeypatch, "unstructured.partition.msg", partition_msg=lambda filename: [SimpleNamespace(text="x")] + ) + + assert UnstructuredMsgExtractor("/tmp/file.msg").extract()[0].page_content == "msg-doc" + + def test_msg_extractor_with_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="msg-doc")]) + calls = {} + + def partition_via_api(filename, api_url, api_key): + calls.update({"filename": filename, "api_url": api_url, "api_key": api_key}) + return [SimpleNamespace(text="x")] + + _register_module(monkeypatch, "unstructured.partition.api", partition_via_api=partition_via_api) + + assert ( + UnstructuredMsgExtractor("/tmp/file.msg", api_url="https://u", api_key="k").extract()[0].page_content + == "msg-doc" + ) + assert calls["filename"] == "/tmp/file.msg" + + def test_xml_extractor_local_and_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="xml-doc")]) + + xml_calls = {} + + def partition_xml(filename, xml_keep_tags): + xml_calls.update({"filename": filename, "xml_keep_tags": xml_keep_tags}) + return [SimpleNamespace(text="x")] + + _register_module(monkeypatch, "unstructured.partition.xml", partition_xml=partition_xml) + + assert UnstructuredXmlExtractor("/tmp/file.xml").extract()[0].page_content == "xml-doc" + assert xml_calls == {"filename": "/tmp/file.xml", "xml_keep_tags": True} + + api_calls = {} + + def partition_via_api(filename, api_url, api_key): + api_calls.update({"filename": filename, "api_url": api_url, "api_key": api_key}) + return [SimpleNamespace(text="x")] + + _register_module(monkeypatch, "unstructured.partition.api", partition_via_api=partition_via_api) + + assert ( + UnstructuredXmlExtractor("/tmp/file.xml", api_url="https://u", api_key="k").extract()[0].page_content + == "xml-doc" + ) + assert api_calls["filename"] == "/tmp/file.xml" + + +class TestUnstructuredEmailAndEpub: + def test_email_extractor_local_decodes_html_and_suppresses_decode_errors(self, monkeypatch): + _register_unstructured_packages(monkeypatch) + captured = {} + + def chunk_by_title( + elements: list[SimpleNamespace], max_characters: int, combine_text_under_n_chars: int + ) -> list[SimpleNamespace]: + captured["elements"] = list(elements) + return [SimpleNamespace(text=" chunked-email ")] + + _register_module(monkeypatch, "unstructured.chunking.title", chunk_by_title=chunk_by_title) + + html = "Hello Email
" + encoded_html = base64.b64encode(html.encode("utf-8")).decode("utf-8") + bad_base64 = "not-base64" + + elements = [SimpleNamespace(text=encoded_html), SimpleNamespace(text=bad_base64)] + _register_module(monkeypatch, "unstructured.partition.email", partition_email=lambda filename: elements) + + docs = UnstructuredEmailExtractor("/tmp/file.eml").extract() + + assert docs[0].page_content == "chunked-email" + chunk_elements = captured["elements"] + assert "Hello Email" in chunk_elements[0].text + assert chunk_elements[1].text == bad_base64 + + def test_email_extractor_with_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="api-email")]) + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [SimpleNamespace(text="abc")], + ) + + docs = UnstructuredEmailExtractor("/tmp/file.eml", api_url="https://u", api_key="k").extract() + + assert docs[0].page_content == "api-email" + + def test_epub_extractor_local_and_api(self, monkeypatch): + _install_chunk_by_title(monkeypatch, [SimpleNamespace(text="epub-doc")]) + + calls = {"download": 0, "partition": 0} + + def fake_download_pandoc(): + calls["download"] += 1 + + def partition_epub(filename, xml_keep_tags): + calls["partition"] += 1 + assert xml_keep_tags is True + return [SimpleNamespace(text="x")] + + monkeypatch.setattr(epub_module.pypandoc, "download_pandoc", fake_download_pandoc) + _register_module(monkeypatch, "unstructured.partition.epub", partition_epub=partition_epub) + + docs = UnstructuredEpubExtractor("/tmp/file.epub").extract() + + assert docs[0].page_content == "epub-doc" + assert calls == {"download": 1, "partition": 1} + + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [SimpleNamespace(text="x")], + ) + + docs = UnstructuredEpubExtractor("/tmp/file.epub", api_url="https://u", api_key="k").extract() + assert docs[0].page_content == "epub-doc" + + +class TestUnstructuredPPTAndPPTX: + def test_ppt_extractor_requires_api_url(self): + with pytest.raises(NotImplementedError, match="Unstructured API Url is not configured"): + UnstructuredPPTExtractor("/tmp/file.ppt").extract() + + def test_ppt_extractor_groups_text_by_page(self, monkeypatch): + _register_unstructured_packages(monkeypatch) + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [ + SimpleNamespace(text="A", metadata=SimpleNamespace(page_number=1)), + SimpleNamespace(text="B", metadata=SimpleNamespace(page_number=1)), + SimpleNamespace(text="skip", metadata=SimpleNamespace(page_number=None)), + SimpleNamespace(text="C", metadata=SimpleNamespace(page_number=2)), + ], + ) + + docs = UnstructuredPPTExtractor("/tmp/file.ppt", api_url="https://u", api_key="k").extract() + + assert [doc.page_content for doc in docs] == ["A\nB", "C"] + + def test_pptx_extractor_local_and_api(self, monkeypatch): + _register_unstructured_packages(monkeypatch) + _register_module( + monkeypatch, + "unstructured.partition.pptx", + partition_pptx=lambda filename: [ + SimpleNamespace(text="P1", metadata=SimpleNamespace(page_number=1)), + SimpleNamespace(text="P2", metadata=SimpleNamespace(page_number=2)), + SimpleNamespace(text="Skip", metadata=SimpleNamespace(page_number=None)), + ], + ) + + docs = UnstructuredPPTXExtractor("/tmp/file.pptx").extract() + assert [doc.page_content for doc in docs] == ["P1", "P2"] + + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [ + SimpleNamespace(text="X", metadata=SimpleNamespace(page_number=1)), + SimpleNamespace(text="Y", metadata=SimpleNamespace(page_number=1)), + ], + ) + + docs = UnstructuredPPTXExtractor("/tmp/file.pptx", api_url="https://u", api_key="k").extract() + assert [doc.page_content for doc in docs] == ["X\nY"] + + +class TestUnstructuredWord: + def _install_doc_modules(self, monkeypatch, version: str, filetype_value): + _register_unstructured_packages(monkeypatch) + + class FileType: + DOC = "doc" + + _register_module(monkeypatch, "unstructured.__version__", __version__=version) + _register_module( + monkeypatch, + "unstructured.file_utils.filetype", + FileType=FileType, + detect_filetype=lambda filename: filetype_value, + ) + _register_module( + monkeypatch, + "unstructured.partition.api", + partition_via_api=lambda filename, api_url, api_key: [SimpleNamespace(text="api-doc")], + ) + _register_module( + monkeypatch, + "unstructured.partition.docx", + partition_docx=lambda filename: [SimpleNamespace(text="docx-doc")], + ) + _register_module( + monkeypatch, + "unstructured.chunking.title", + chunk_by_title=lambda elements, max_characters, combine_text_under_n_chars: [ + SimpleNamespace(text="chunk-1"), + SimpleNamespace(text="chunk-2"), + ], + ) + + def test_word_extractor_rejects_doc_on_old_unstructured_version(self, monkeypatch): + self._install_doc_modules(monkeypatch, version="0.4.10", filetype_value="doc") + + with pytest.raises(ValueError, match="Partitioning .doc files is only supported"): + UnstructuredWordExtractor("/tmp/file.doc", "https://u", "k").extract() + + def test_word_extractor_doc_and_docx_paths(self, monkeypatch): + self._install_doc_modules(monkeypatch, version="0.4.11", filetype_value="doc") + + docs = UnstructuredWordExtractor("/tmp/file.doc", "https://u", "k").extract() + assert [doc.page_content for doc in docs] == ["chunk-1", "chunk-2"] + + self._install_doc_modules(monkeypatch, version="0.5.0", filetype_value="not-doc") + docs = UnstructuredWordExtractor("/tmp/file.docx", "https://u", "k").extract() + assert [doc.page_content for doc in docs] == ["chunk-1", "chunk-2"] + + def test_word_extractor_magic_import_error_fallback_to_extension(self, monkeypatch): + self._install_doc_modules(monkeypatch, version="0.4.10", filetype_value="not-used") + monkeypatch.setitem(sys.modules, "magic", None) + + with pytest.raises(ValueError, match="Partitioning .doc files is only supported"): + UnstructuredWordExtractor("/tmp/file.doc", "https://u", "k").extract() diff --git a/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py b/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py new file mode 100644 index 0000000000..d758be218a --- /dev/null +++ b/api/tests/unit_tests/core/rag/extractor/watercrawl/test_watercrawl.py @@ -0,0 +1,434 @@ +"""Unit tests for WaterCrawl client, provider, and extractor behavior.""" + +import json +from typing import Any +from unittest.mock import MagicMock + +import pytest + +import core.rag.extractor.watercrawl.client as client_module +from core.rag.extractor.watercrawl.client import BaseAPIClient, WaterCrawlAPIClient +from core.rag.extractor.watercrawl.exceptions import ( + WaterCrawlAuthenticationError, + WaterCrawlBadRequestError, + WaterCrawlPermissionError, +) +from core.rag.extractor.watercrawl.extractor import WaterCrawlWebExtractor +from core.rag.extractor.watercrawl.provider import WaterCrawlProvider + + +def _response( + status_code: int, + json_data: dict[str, Any] | None = None, + content_type: str = "application/json", + content: bytes = b"", + text: str = "", +) -> MagicMock: + response = MagicMock() + response.status_code = status_code + response.headers = {"Content-Type": content_type} + response.content = content + response.text = text + response.json.return_value = json_data if json_data is not None else {} + response.raise_for_status.return_value = None + response.close.return_value = None + return response + + +class TestWaterCrawlExceptions: + def test_bad_request_error_properties_and_string(self): + response = _response(400, {"message": "bad request", "errors": {"url": ["invalid"]}}) + + err = WaterCrawlBadRequestError(response) + parsed_errors = json.loads(err.flat_errors) + + assert err.status_code == 400 + assert err.message == "bad request" + assert "url" in parsed_errors + assert any("invalid" in str(item) for item in parsed_errors["url"]) + assert "WaterCrawlBadRequestError" in str(err) + + def test_permission_and_authentication_error_strings(self): + response = _response(403, {"message": "quota exceeded", "errors": {}}) + + permission = WaterCrawlPermissionError(response) + authentication = WaterCrawlAuthenticationError(response) + + assert "exceeding your WaterCrawl API limits" in str(permission) + assert "API key is invalid or expired" in str(authentication) + + +class TestBaseAPIClient: + def test_init_session_builds_expected_headers(self, monkeypatch): + captured = {} + + def fake_client(**kwargs): + captured.update(kwargs) + return "session" + + monkeypatch.setattr(client_module.httpx, "Client", fake_client) + + client = BaseAPIClient(api_key="k", base_url="https://watercrawl.dev") + + assert client.session == "session" + assert captured["headers"]["X-API-Key"] == "k" + assert captured["headers"]["User-Agent"] == "WaterCrawl-Plugin" + + def test_request_stream_and_non_stream_paths(self, monkeypatch): + class FakeSession: + def __init__(self): + self.request_calls = [] + self.build_calls = [] + self.send_calls = [] + + def request(self, method, url, params=None, json=None, **kwargs): + self.request_calls.append((method, url, params, json, kwargs)) + return "non-stream-response" + + def build_request(self, method, url, params=None, json=None): + req = (method, url, params, json) + self.build_calls.append(req) + return req + + def send(self, request, stream=False, **kwargs): + self.send_calls.append((request, stream, kwargs)) + return "stream-response" + + fake_session = FakeSession() + monkeypatch.setattr(BaseAPIClient, "init_session", lambda self: fake_session) + + client = BaseAPIClient(api_key="k", base_url="https://watercrawl.dev") + + assert client._request("GET", "/v1/items", query_params={"a": 1}) == "non-stream-response" + assert fake_session.request_calls[0][1] == "https://watercrawl.dev/v1/items" + + assert client._request("GET", "/v1/items", stream=True) == "stream-response" + assert fake_session.build_calls + assert fake_session.send_calls[0][1] is True + + def test_http_method_helpers_delegate_to_request(self, monkeypatch): + monkeypatch.setattr(BaseAPIClient, "init_session", lambda self: MagicMock()) + client = BaseAPIClient(api_key="k", base_url="https://watercrawl.dev") + + calls = [] + + def fake_request(method, endpoint, query_params=None, data=None, **kwargs): + calls.append((method, endpoint, query_params, data)) + return "ok" + + monkeypatch.setattr(client, "_request", fake_request) + + assert client._get("/a") == "ok" + assert client._post("/b", data={"x": 1}) == "ok" + assert client._put("/c", data={"x": 2}) == "ok" + assert client._delete("/d") == "ok" + assert client._patch("/e", data={"x": 3}) == "ok" + assert [c[0] for c in calls] == ["GET", "POST", "PUT", "DELETE", "PATCH"] + + +class TestWaterCrawlAPIClient: + def test_process_eventstream_and_download(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + + response = MagicMock() + response.iter_lines.return_value = [ + b"event: keep-alive", + b'data: {"type":"result","data":{"result":"http://x"}}', + b'data: {"type":"log","data":{"msg":"ok"}}', + ] + + monkeypatch.setattr(client, "download_result", lambda data: {"result": {"markdown": "body"}, "url": "u"}) + + events = list(client.process_eventstream(response, download=True)) + + assert events[0]["data"]["result"]["markdown"] == "body" + assert events[1]["type"] == "log" + response.close.assert_called_once() + + @pytest.mark.parametrize( + ("status", "expected_exception"), + [ + (401, WaterCrawlAuthenticationError), + (403, WaterCrawlPermissionError), + (422, WaterCrawlBadRequestError), + ], + ) + def test_process_response_error_statuses(self, status: int, expected_exception: type[Exception]): + client = WaterCrawlAPIClient(api_key="k") + + with pytest.raises(expected_exception): + client.process_response(_response(status, {"message": "bad", "errors": {"url": ["x"]}})) + + def test_process_response_204_returns_none(self): + client = WaterCrawlAPIClient(api_key="k") + assert client.process_response(_response(204, None)) is None + + def test_process_response_json_payloads(self): + client = WaterCrawlAPIClient(api_key="k") + assert client.process_response(_response(200, {"ok": True})) == {"ok": True} + assert client.process_response(_response(200, None)) == {} + + def test_process_response_octet_stream_returns_bytes(self): + client = WaterCrawlAPIClient(api_key="k") + assert ( + client.process_response(_response(200, content_type="application/octet-stream", content=b"bin")) == b"bin" + ) + + def test_process_response_event_stream_returns_generator(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + generator = (item for item in [{"type": "result", "data": {}}]) + monkeypatch.setattr(client, "process_eventstream", lambda response, download=False: generator) + assert client.process_response(_response(200, content_type="text/event-stream")) is generator + + def test_process_response_unknown_content_type_raises(self): + client = WaterCrawlAPIClient(api_key="k") + with pytest.raises(Exception, match="Unknown response type"): + client.process_response(_response(200, content_type="text/plain", text="x")) + + def test_process_response_uses_raise_for_status(self): + client = WaterCrawlAPIClient(api_key="k") + response = _response(500, {"message": "server"}) + response.raise_for_status.side_effect = RuntimeError("http error") + + with pytest.raises(RuntimeError, match="http error"): + client.process_response(response) + + def test_endpoint_wrappers(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + + monkeypatch.setattr(client, "process_response", lambda resp: "processed") + monkeypatch.setattr(client, "_get", lambda *args, **kwargs: "get-resp") + monkeypatch.setattr(client, "_post", lambda *args, **kwargs: "post-resp") + monkeypatch.setattr(client, "_delete", lambda *args, **kwargs: "delete-resp") + + assert client.get_crawl_requests_list() == "processed" + assert client.get_crawl_request("id") == "processed" + assert client.create_crawl_request(url="https://x") == "processed" + assert client.stop_crawl_request("id") == "processed" + assert client.download_crawl_request("id") == "processed" + assert client.get_crawl_request_results("id") == "processed" + + def test_monitor_crawl_request_generator_and_validation(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + + monkeypatch.setattr(client, "process_response", lambda _: (x for x in [{"type": "result", "data": 1}])) + monkeypatch.setattr(client, "_get", lambda *args, **kwargs: "stream-resp") + + events = list(client.monitor_crawl_request("job-1", prefetched=True)) + assert events == [{"type": "result", "data": 1}] + + monkeypatch.setattr(client, "process_response", lambda _: [{"type": "result"}]) + with pytest.raises(ValueError, match="Generator expected"): + list(client.monitor_crawl_request("job-1")) + + def test_scrape_url_sync_and_async(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + monkeypatch.setattr(client, "create_crawl_request", lambda **kwargs: {"uuid": "job-1"}) + + async_result = client.scrape_url("https://example.com", sync=False) + assert async_result == {"uuid": "job-1"} + + monkeypatch.setattr( + client, + "monitor_crawl_request", + lambda item_id, prefetched: iter( + [{"type": "log", "data": {}}, {"type": "result", "data": {"url": "https://example.com"}}] + ), + ) + sync_result = client.scrape_url("https://example.com", sync=True) + assert sync_result == {"url": "https://example.com"} + + def test_download_result_fetches_json_and_closes(self, monkeypatch): + client = WaterCrawlAPIClient(api_key="k") + + response = _response(200, {"markdown": "body"}) + monkeypatch.setattr(client_module.httpx, "get", lambda *args, **kwargs: response) + + result = client.download_result({"result": "https://example.com/result.json"}) + + assert result["result"] == {"markdown": "body"} + response.close.assert_called_once() + + +class TestWaterCrawlProvider: + def test_crawl_url_builds_options_and_min_wait_time(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + captured_kwargs = {} + + def create_crawl_request_spy(**kwargs): + captured_kwargs.update(kwargs) + return {"uuid": "job-1"} + + monkeypatch.setattr(provider.client, "create_crawl_request", create_crawl_request_spy) + + result = provider.crawl_url( + "https://example.com", + { + "crawl_sub_pages": True, + "limit": 5, + "max_depth": 2, + "includes": "a,b", + "excludes": "x,y", + "exclude_tags": "nav,footer", + "include_tags": "main", + "wait_time": 100, + "only_main_content": False, + }, + ) + + assert result == {"status": "active", "job_id": "job-1"} + assert captured_kwargs["url"] == "https://example.com" + assert captured_kwargs["spider_options"] == { + "max_depth": 2, + "page_limit": 5, + "allowed_domains": [], + "exclude_paths": ["x", "y"], + "include_paths": ["a", "b"], + } + assert captured_kwargs["page_options"]["exclude_tags"] == ["nav", "footer"] + assert captured_kwargs["page_options"]["include_tags"] == ["main"] + assert captured_kwargs["page_options"]["only_main_content"] is False + assert captured_kwargs["page_options"]["wait_time"] == 1000 + + def test_get_crawl_status_active_and_completed(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + + monkeypatch.setattr( + provider.client, + "get_crawl_request", + lambda job_id: { + "status": "running", + "uuid": job_id, + "options": {"spider_options": {"page_limit": 3}}, + "number_of_documents": 1, + "duration": "00:00:01.500000", + }, + ) + + active = provider.get_crawl_status("job-1") + assert active["status"] == "active" + assert active["data"] == [] + assert active["time_consuming"] == pytest.approx(1.5) + + monkeypatch.setattr( + provider.client, + "get_crawl_request", + lambda job_id: { + "status": "completed", + "uuid": job_id, + "options": {"spider_options": {"page_limit": 2}}, + "number_of_documents": 2, + "duration": "00:00:02.000000", + }, + ) + monkeypatch.setattr(provider, "_get_results", lambda crawl_request_id, query_params=None: iter([{"url": "u"}])) + + completed = provider.get_crawl_status("job-2") + assert completed["status"] == "completed" + assert completed["data"] == [{"url": "u"}] + + def test_get_crawl_url_data_and_scrape(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + + monkeypatch.setattr(provider, "scrape_url", lambda url: {"source_url": url}) + assert provider.get_crawl_url_data("", "https://example.com") == {"source_url": "https://example.com"} + + monkeypatch.setattr(provider, "_get_results", lambda job_id, query_params=None: iter([{"source_url": "u1"}])) + assert provider.get_crawl_url_data("job", "u1") == {"source_url": "u1"} + + monkeypatch.setattr(provider, "_get_results", lambda job_id, query_params=None: iter([])) + assert provider.get_crawl_url_data("job", "u1") is None + + def test_structure_data_validation_and_get_results_pagination(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + + with pytest.raises(ValueError, match="Invalid result object"): + provider._structure_data({"result": "not-a-dict"}) + + structured = provider._structure_data( + { + "url": "https://example.com", + "result": { + "metadata": {"title": "Title", "description": "Desc"}, + "markdown": "Body", + }, + } + ) + assert structured["title"] == "Title" + assert structured["markdown"] == "Body" + + responses = [ + { + "results": [ + { + "url": "https://a", + "result": {"metadata": {"title": "A", "description": "DA"}, "markdown": "MA"}, + } + ], + "next": "next-page", + }, + {"results": [], "next": None}, + ] + + monkeypatch.setattr( + provider.client, + "get_crawl_request_results", + lambda crawl_request_id, page, page_size, query_params: responses.pop(0), + ) + + results = list(provider._get_results("job-1")) + assert len(results) == 1 + assert results[0]["source_url"] == "https://a" + + def test_scrape_url_uses_client_and_structure(self, monkeypatch): + provider = WaterCrawlProvider(api_key="k") + monkeypatch.setattr( + provider.client, "scrape_url", lambda **kwargs: {"result": {"metadata": {}, "markdown": "m"}, "url": "u"} + ) + + result = provider.scrape_url("u") + + assert result["source_url"] == "u" + + +class TestWaterCrawlWebExtractor: + def test_extract_crawl_and_scrape_modes(self, monkeypatch): + monkeypatch.setattr( + "core.rag.extractor.watercrawl.extractor.WebsiteService.get_crawl_url_data", + lambda job_id, provider, url, tenant_id: { + "markdown": "crawl", + "source_url": url, + "description": "d", + "title": "t", + }, + ) + monkeypatch.setattr( + "core.rag.extractor.watercrawl.extractor.WebsiteService.get_scrape_url_data", + lambda provider, url, tenant_id, only_main_content: { + "markdown": "scrape", + "source_url": url, + "description": "d", + "title": "t", + }, + ) + + crawl_extractor = WaterCrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + scrape_extractor = WaterCrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="scrape") + + assert crawl_extractor.extract()[0].page_content == "crawl" + assert scrape_extractor.extract()[0].page_content == "scrape" + + def test_extract_crawl_returns_empty_when_service_returns_none(self, monkeypatch): + monkeypatch.setattr( + "core.rag.extractor.watercrawl.extractor.WebsiteService.get_crawl_url_data", + lambda job_id, provider, url, tenant_id: None, + ) + + extractor = WaterCrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="crawl") + + assert extractor.extract() == [] + + def test_extract_unknown_mode_returns_empty(self): + extractor = WaterCrawlWebExtractor("https://example.com", "job-1", "tenant-1", mode="other") + + assert extractor.extract() == [] diff --git a/api/tests/unit_tests/core/rag/indexing/processor/conftest.py b/api/tests/unit_tests/core/rag/indexing/processor/conftest.py new file mode 100644 index 0000000000..2a3860e107 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/processor/conftest.py @@ -0,0 +1,33 @@ +from contextlib import AbstractContextManager, nullcontext +from typing import Any + +import pytest + + +class _FakeFlaskApp: + def app_context(self) -> AbstractContextManager[None]: + return nullcontext() + + +class _FakeExecutor: + def __init__(self, future: Any) -> None: + self._future = future + + def __enter__(self) -> "_FakeExecutor": + return self + + def __exit__(self, exc_type: object, exc_value: object, traceback: object) -> bool: + return False + + def submit(self, func: object, preview: object) -> Any: + return self._future + + +@pytest.fixture +def fake_flask_app() -> _FakeFlaskApp: + return _FakeFlaskApp() + + +@pytest.fixture +def fake_executor_cls() -> type[_FakeExecutor]: + return _FakeExecutor diff --git a/api/tests/unit_tests/core/rag/indexing/processor/test_paragraph_index_processor.py b/api/tests/unit_tests/core/rag/indexing/processor/test_paragraph_index_processor.py new file mode 100644 index 0000000000..e6cc582398 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/processor/test_paragraph_index_processor.py @@ -0,0 +1,630 @@ +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest + +from core.entities.knowledge_entities import PreviewDetail +from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor +from core.rag.models.document import AttachmentDocument, Document +from dify_graph.model_runtime.entities.llm_entities import LLMResult, LLMUsage +from dify_graph.model_runtime.entities.message_entities import AssistantPromptMessage, ImagePromptMessageContent +from dify_graph.model_runtime.entities.model_entities import ModelFeature + + +class TestParagraphIndexProcessor: + @pytest.fixture + def processor(self) -> ParagraphIndexProcessor: + return ParagraphIndexProcessor() + + @pytest.fixture + def dataset(self) -> Mock: + dataset = Mock() + dataset.id = "dataset-1" + dataset.tenant_id = "tenant-1" + dataset.indexing_technique = "high_quality" + dataset.is_multimodal = True + return dataset + + @pytest.fixture + def dataset_document(self) -> Mock: + document = Mock() + document.id = "doc-1" + document.created_by = "user-1" + return document + + @pytest.fixture + def process_rule(self) -> dict: + return { + "mode": "custom", + "rules": {"segmentation": {"max_tokens": 256, "chunk_overlap": 10, "separator": "\n"}}, + } + + def _rules(self) -> SimpleNamespace: + segmentation = SimpleNamespace(max_tokens=256, chunk_overlap=10, separator="\n") + return SimpleNamespace(segmentation=segmentation) + + def _llm_result(self, content: str = "summary") -> LLMResult: + return LLMResult( + model="llm-model", + message=AssistantPromptMessage(content=content), + usage=LLMUsage.empty_usage(), + ) + + def test_extract_forwards_automatic_flag(self, processor: ParagraphIndexProcessor) -> None: + extract_setting = Mock() + expected_docs = [Document(page_content="chunk", metadata={})] + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.ExtractProcessor.extract" + ) as mock_extract: + mock_extract.return_value = expected_docs + docs = processor.extract(extract_setting, process_rule_mode="hierarchical") + + assert docs == expected_docs + mock_extract.assert_called_once_with(extract_setting=extract_setting, is_automatic=True) + + def test_transform_validates_process_rule(self, processor: ParagraphIndexProcessor) -> None: + with pytest.raises(ValueError, match="No process rule found"): + processor.transform([Document(page_content="text", metadata={})], process_rule=None) + + with pytest.raises(ValueError, match="No rules found in process rule"): + processor.transform([Document(page_content="text", metadata={})], process_rule={"mode": "custom"}) + + def test_transform_validates_segmentation(self, processor: ParagraphIndexProcessor, process_rule: dict) -> None: + rules_without_segmentation = SimpleNamespace(segmentation=None) + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.Rule.model_validate", + return_value=rules_without_segmentation, + ): + with pytest.raises(ValueError, match="No segmentation found in rules"): + processor.transform( + [Document(page_content="text", metadata={})], + process_rule={"mode": "custom", "rules": {"enabled": True}}, + ) + + def test_transform_builds_split_documents(self, processor: ParagraphIndexProcessor, process_rule: dict) -> None: + source_document = Document(page_content="source", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}) + splitter = Mock() + splitter.split_documents.return_value = [ + Document(page_content=".first", metadata={}), + Document(page_content=" ", metadata={}), + ] + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.Rule.model_validate", + return_value=self._rules(), + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.CleanProcessor.clean", + return_value=".first", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.remove_leading_symbols", + side_effect=lambda text: text.lstrip("."), + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="image", metadata={})] + ), + ): + documents = processor.transform([source_document], process_rule=process_rule) + + assert len(documents) == 1 + assert documents[0].page_content == "first" + assert documents[0].attachments is not None + assert documents[0].metadata["doc_hash"] == "hash" + + def test_transform_automatic_mode_uses_default_rules(self, processor: ParagraphIndexProcessor) -> None: + splitter = Mock() + splitter.split_documents.return_value = [Document(page_content="text", metadata={})] + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.Rule.model_validate", + return_value=self._rules(), + ) as mock_validate, + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.CleanProcessor.clean", + side_effect=lambda text, _: text, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.remove_leading_symbols", + side_effect=lambda text: text, + ), + patch.object(processor, "_get_content_files", return_value=[]), + ): + processor.transform([Document(page_content="text", metadata={})], process_rule={"mode": "automatic"}) + + assert mock_validate.call_count == 1 + + def test_load_creates_vector_and_multimodal_when_high_quality( + self, processor: ParagraphIndexProcessor, dataset: Mock + ) -> None: + docs = [Document(page_content="chunk", metadata={})] + multimodal_docs = [AttachmentDocument(page_content="image", metadata={})] + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls, + ): + processor.load(dataset, docs, multimodal_documents=multimodal_docs) + vector = mock_vector_cls.return_value + vector.create.assert_called_once_with(docs) + vector.create_multimodal.assert_called_once_with(multimodal_docs) + mock_keyword_cls.assert_not_called() + + def test_load_uses_keyword_add_texts_with_keywords_when_economy( + self, processor: ParagraphIndexProcessor, dataset: Mock + ) -> None: + dataset.indexing_technique = "economy" + docs = [Document(page_content="chunk", metadata={})] + + with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls: + processor.load(dataset, docs, keywords_list=["k1", "k2"]) + + mock_keyword_cls.return_value.add_texts.assert_called_once_with(docs, keywords_list=["k1", "k2"]) + + def test_load_uses_keyword_add_texts_without_keywords_when_economy( + self, processor: ParagraphIndexProcessor, dataset: Mock + ) -> None: + dataset.indexing_technique = "economy" + docs = [Document(page_content="chunk", metadata={})] + + with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls: + processor.load(dataset, docs) + + mock_keyword_cls.return_value.add_texts.assert_called_once_with(docs) + + def test_clean_deletes_summaries_and_vector(self, processor: ParagraphIndexProcessor, dataset: Mock) -> None: + segment_query = Mock() + segment_query.filter.return_value.all.return_value = [SimpleNamespace(id="seg-1")] + session = Mock() + session.query.return_value = segment_query + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.paragraph_index_processor.Vector") as mock_vector_cls, + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, ["node-1"], delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, ["seg-1"]) + vector.delete_by_ids.assert_called_once_with(["node-1"]) + + def test_clean_economy_deletes_summaries_and_keywords( + self, processor: ParagraphIndexProcessor, dataset: Mock + ) -> None: + dataset.indexing_technique = "economy" + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls, + ): + processor.clean(dataset, None, delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, None) + mock_keyword_cls.return_value.delete.assert_called_once() + + def test_clean_deletes_keywords_by_ids(self, processor: ParagraphIndexProcessor, dataset: Mock) -> None: + dataset.indexing_technique = "economy" + with patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls: + processor.clean(dataset, ["node-2"], with_keywords=True) + + mock_keyword_cls.return_value.delete_by_ids.assert_called_once_with(["node-2"]) + + def test_retrieve_filters_by_threshold(self, processor: ParagraphIndexProcessor, dataset: Mock) -> None: + accepted = SimpleNamespace(page_content="keep", metadata={"source": "a"}, score=0.9) + rejected = SimpleNamespace(page_content="drop", metadata={"source": "b"}, score=0.1) + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.RetrievalService.retrieve" + ) as mock_retrieve: + mock_retrieve.return_value = [accepted, rejected] + reranking_model = {"reranking_provider_name": "", "reranking_model_name": ""} + docs = processor.retrieve("semantic_search", "query", dataset, 5, 0.5, reranking_model) + + assert len(docs) == 1 + assert docs[0].metadata["score"] == 0.9 + + def test_index_list_chunks_high_quality( + self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="img", metadata={})] + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.DatasetDocumentStore" + ) as mock_store_cls, + patch("core.rag.index_processor.processor.paragraph_index_processor.Vector") as mock_vector_cls, + ): + processor.index(dataset, dataset_document, ["chunk-1", "chunk-2"]) + + mock_store_cls.return_value.add_documents.assert_called_once() + mock_vector_cls.return_value.create.assert_called_once() + mock_vector_cls.return_value.create_multimodal.assert_called_once() + + def test_index_list_chunks_economy( + self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + dataset.indexing_technique = "economy" + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch.object(processor, "_get_content_files", return_value=[]), + patch("core.rag.index_processor.processor.paragraph_index_processor.DatasetDocumentStore"), + patch("core.rag.index_processor.processor.paragraph_index_processor.Keyword") as mock_keyword_cls, + ): + processor.index(dataset, dataset_document, ["chunk-3"]) + + mock_keyword_cls.return_value.add_texts.assert_called_once() + + def test_index_multimodal_structure_handles_files_and_account_lookup( + self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + chunk_with_files = SimpleNamespace( + content="content-1", + files=[SimpleNamespace(id="file-1", filename="image.png")], + ) + chunk_without_files = SimpleNamespace(content="content-2", files=None) + structure = SimpleNamespace(general_chunks=[chunk_with_files, chunk_without_files]) + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.MultimodalGeneralStructureChunk.model_validate", + return_value=structure, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.AccountService.load_user", + return_value=SimpleNamespace(id="user-1"), + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="img", metadata={})] + ) as mock_files, + patch("core.rag.index_processor.processor.paragraph_index_processor.DatasetDocumentStore"), + patch("core.rag.index_processor.processor.paragraph_index_processor.Vector"), + ): + processor.index(dataset, dataset_document, {"general_chunks": []}) + + assert mock_files.call_count == 1 + + def test_index_multimodal_structure_requires_valid_account( + self, processor: ParagraphIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + structure = SimpleNamespace(general_chunks=[SimpleNamespace(content="content", files=None)]) + + with ( + patch( + "core.rag.index_processor.processor.paragraph_index_processor.MultimodalGeneralStructureChunk.model_validate", + return_value=structure, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.AccountService.load_user", + return_value=None, + ), + ): + with pytest.raises(ValueError, match="Invalid account"): + processor.index(dataset, dataset_document, {"general_chunks": []}) + + def test_format_preview_validates_chunk_shape(self, processor: ParagraphIndexProcessor) -> None: + preview = processor.format_preview(["chunk-1", "chunk-2"]) + assert preview["chunk_structure"] == "text_model" + assert preview["total_segments"] == 2 + + with pytest.raises(ValueError, match="Chunks is not a list"): + processor.format_preview({"not": "a-list"}) + + def test_generate_summary_preview_success_and_failure(self, processor: ParagraphIndexProcessor) -> None: + preview_items = [PreviewDetail(content="chunk-1"), PreviewDetail(content="chunk-2")] + + with patch.object(processor, "generate_summary", return_value=("summary", LLMUsage.empty_usage())): + result = processor.generate_summary_preview( + "tenant-1", preview_items, {"enable": True}, doc_language="English" + ) + assert all(item.summary == "summary" for item in result) + + with patch.object(processor, "generate_summary", side_effect=RuntimeError("summary failed")): + with pytest.raises(ValueError, match="Failed to generate summaries"): + processor.generate_summary_preview("tenant-1", [PreviewDetail(content="chunk-1")], {"enable": True}) + + def test_generate_summary_preview_fallback_without_flask_context(self, processor: ParagraphIndexProcessor) -> None: + preview_items = [PreviewDetail(content="chunk-1")] + fake_current_app = SimpleNamespace(_get_current_object=Mock(side_effect=RuntimeError("no app"))) + + with ( + patch("flask.current_app", fake_current_app), + patch.object(processor, "generate_summary", return_value=("summary", LLMUsage.empty_usage())), + ): + result = processor.generate_summary_preview("tenant-1", preview_items, {"enable": True}) + + assert result[0].summary == "summary" + + def test_generate_summary_preview_timeout( + self, processor: ParagraphIndexProcessor, fake_executor_cls: type + ) -> None: + preview_items = [PreviewDetail(content="chunk-1")] + future = Mock() + executor = fake_executor_cls(future) + + with ( + patch("concurrent.futures.ThreadPoolExecutor", return_value=executor), + patch("concurrent.futures.wait", side_effect=[(set(), {future}), (set(), set())]), + ): + with pytest.raises(ValueError, match="timeout"): + processor.generate_summary_preview("tenant-1", preview_items, {"enable": True}) + + future.cancel.assert_called_once() + + def test_generate_summary_validates_input(self) -> None: + with pytest.raises(ValueError, match="must be enabled"): + ParagraphIndexProcessor.generate_summary("tenant-1", "text", {"enable": False}) + + with pytest.raises(ValueError, match="model_name and model_provider_name"): + ParagraphIndexProcessor.generate_summary("tenant-1", "text", {"enable": True}) + + def test_generate_summary_text_only_flow(self) -> None: + model_instance = Mock() + model_instance.credentials = {"k": "v"} + model_instance.model_type_instance.get_model_schema.return_value = SimpleNamespace(features=[]) + model_instance.invoke_llm.return_value = self._llm_result("text summary") + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.ProviderManager") as mock_pm_cls, + patch( + "core.rag.index_processor.processor.paragraph_index_processor.ModelInstance", + return_value=model_instance, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.deduct_llm_quota", + side_effect=RuntimeError("quota"), + ), + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + mock_pm_cls.return_value.get_provider_model_bundle.return_value = Mock() + summary, usage = ParagraphIndexProcessor.generate_summary( + "tenant-1", + "text content", + {"enable": True, "model_name": "model-a", "model_provider_name": "provider-a"}, + document_language="English", + ) + + assert summary == "text summary" + assert isinstance(usage, LLMUsage) + mock_logger.warning.assert_called_with("Failed to deduct quota for summary generation: %s", "quota") + + def test_generate_summary_handles_vision_and_image_conversion(self) -> None: + model_instance = Mock() + model_instance.credentials = {"k": "v"} + model_instance.model_type_instance.get_model_schema.return_value = SimpleNamespace( + features=[ModelFeature.VISION] + ) + model_instance.invoke_llm.return_value = self._llm_result("vision summary") + image_file = SimpleNamespace() + image_content = ImagePromptMessageContent(format="url", mime_type="image/png", url="http://example.com/a.png") + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.ProviderManager") as mock_pm_cls, + patch( + "core.rag.index_processor.processor.paragraph_index_processor.ModelInstance", + return_value=model_instance, + ), + patch.object( + ParagraphIndexProcessor, "_extract_images_from_segment_attachments", return_value=[image_file] + ), + patch.object(ParagraphIndexProcessor, "_extract_images_from_text", return_value=[]) as mock_extract_text, + patch( + "core.rag.index_processor.processor.paragraph_index_processor.file_manager.to_prompt_message_content", + return_value=image_content, + ), + patch("core.rag.index_processor.processor.paragraph_index_processor.deduct_llm_quota"), + ): + mock_pm_cls.return_value.get_provider_model_bundle.return_value = Mock() + summary, _ = ParagraphIndexProcessor.generate_summary( + "tenant-1", + "text content", + {"enable": True, "model_name": "model-a", "model_provider_name": "provider-a"}, + segment_id="seg-1", + ) + + assert summary == "vision summary" + mock_extract_text.assert_not_called() + + def test_generate_summary_fallbacks_for_prompt_and_result_types(self) -> None: + model_instance = Mock() + model_instance.credentials = {"k": "v"} + model_instance.model_type_instance.get_model_schema.return_value = SimpleNamespace( + features=[ModelFeature.VISION] + ) + model_instance.invoke_llm.return_value = object() + image_file = SimpleNamespace() + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.ProviderManager") as mock_pm_cls, + patch( + "core.rag.index_processor.processor.paragraph_index_processor.ModelInstance", + return_value=model_instance, + ), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.DEFAULT_GENERATOR_SUMMARY_PROMPT", + "Prompt {missing}", + ), + patch.object(ParagraphIndexProcessor, "_extract_images_from_segment_attachments", return_value=[]), + patch.object(ParagraphIndexProcessor, "_extract_images_from_text", return_value=[image_file]), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.file_manager.to_prompt_message_content", + side_effect=RuntimeError("bad image"), + ), + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + mock_pm_cls.return_value.get_provider_model_bundle.return_value = Mock() + with pytest.raises(ValueError, match="Expected LLMResult"): + ParagraphIndexProcessor.generate_summary( + "tenant-1", + "text content", + {"enable": True, "model_name": "model-a", "model_provider_name": "provider-a"}, + ) + + mock_logger.warning.assert_called_with( + "Failed to convert image file to prompt message content: %s", "bad image" + ) + + def test_extract_images_from_text_handles_patterns_and_build_errors(self) -> None: + text = ( + " " + " " + "" + ) + image_upload = SimpleNamespace( + id="11111111-1111-1111-1111-111111111111", + tenant_id="tenant-1", + name="image.png", + mime_type="image/png", + extension="png", + source_url="", + size=1, + key="key", + ) + non_image_upload = SimpleNamespace( + id="22222222-2222-2222-2222-222222222222", + tenant_id="tenant-1", + name="file.txt", + mime_type="text/plain", + extension="txt", + source_url="", + size=1, + key="key", + ) + query = Mock() + query.where.return_value.all.return_value = [image_upload, non_image_upload] + session = Mock() + session.query.return_value = query + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.build_from_mapping", + return_value=SimpleNamespace(id="file-1"), + ) as mock_builder, + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + files = ParagraphIndexProcessor._extract_images_from_text("tenant-1", text) + + assert len(files) == 1 + assert mock_builder.call_count == 1 + mock_logger.warning.assert_not_called() + + def test_extract_images_from_text_returns_empty_when_no_matches(self) -> None: + assert ParagraphIndexProcessor._extract_images_from_text("tenant-1", "no images here") == [] + + def test_extract_images_from_text_logs_when_build_fails(self) -> None: + text = "" + image_upload = SimpleNamespace( + id="11111111-1111-1111-1111-111111111111", + tenant_id="tenant-1", + name="image.png", + mime_type="image/png", + extension="png", + source_url="", + size=1, + key="key", + ) + query = Mock() + query.where.return_value.all.return_value = [image_upload] + session = Mock() + session.query.return_value = query + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.build_from_mapping", + side_effect=RuntimeError("build failed"), + ), + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + files = ParagraphIndexProcessor._extract_images_from_text("tenant-1", text) + + assert files == [] + mock_logger.warning.assert_called_once() + + def test_extract_images_from_segment_attachments(self) -> None: + image_upload = SimpleNamespace( + id="file-1", + name="image", + extension="png", + mime_type="image/png", + source_url="", + size=1, + key="k1", + ) + bad_upload = SimpleNamespace( + id="file-2", + name="broken", + extension=None, + mime_type="image/png", + source_url="", + size=1, + key="k2", + ) + non_image_upload = SimpleNamespace( + id="file-3", + name="text", + extension="txt", + mime_type="text/plain", + source_url="", + size=1, + key="k3", + ) + execute_result = Mock() + execute_result.all.return_value = [(None, image_upload), (None, bad_upload), (None, non_image_upload)] + session = Mock() + session.execute.return_value = execute_result + + with ( + patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session), + patch("core.rag.index_processor.processor.paragraph_index_processor.logger") as mock_logger, + ): + files = ParagraphIndexProcessor._extract_images_from_segment_attachments("tenant-1", "seg-1") + + assert len(files) == 1 + mock_logger.warning.assert_called_once() + + def test_extract_images_from_segment_attachments_empty(self) -> None: + execute_result = Mock() + execute_result.all.return_value = [] + session = Mock() + session.execute.return_value = execute_result + + with patch("core.rag.index_processor.processor.paragraph_index_processor.db.session", session): + empty_files = ParagraphIndexProcessor._extract_images_from_segment_attachments("tenant-1", "seg-1") + + assert empty_files == [] diff --git a/api/tests/unit_tests/core/rag/indexing/processor/test_parent_child_index_processor.py b/api/tests/unit_tests/core/rag/indexing/processor/test_parent_child_index_processor.py new file mode 100644 index 0000000000..5c78cae7c1 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/processor/test_parent_child_index_processor.py @@ -0,0 +1,524 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from core.entities.knowledge_entities import PreviewDetail +from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor +from core.rag.models.document import AttachmentDocument, ChildDocument, Document +from services.entities.knowledge_entities.knowledge_entities import ParentMode + + +class TestParentChildIndexProcessor: + @pytest.fixture + def processor(self) -> ParentChildIndexProcessor: + return ParentChildIndexProcessor() + + @pytest.fixture + def dataset(self) -> Mock: + dataset = Mock() + dataset.id = "dataset-1" + dataset.tenant_id = "tenant-1" + dataset.indexing_technique = "high_quality" + dataset.is_multimodal = True + return dataset + + @pytest.fixture + def dataset_document(self) -> Mock: + document = Mock() + document.id = "doc-1" + document.created_by = "user-1" + document.dataset_process_rule_id = None + return document + + def _segmentation(self) -> SimpleNamespace: + return SimpleNamespace(max_tokens=200, chunk_overlap=10, separator="\n") + + def _paragraph_rules(self) -> SimpleNamespace: + return SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + segmentation=self._segmentation(), + subchunk_segmentation=self._segmentation(), + ) + + def _full_doc_rules(self) -> SimpleNamespace: + return SimpleNamespace( + parent_mode=ParentMode.FULL_DOC, segmentation=None, subchunk_segmentation=self._segmentation() + ) + + def test_extract_forwards_automatic_flag(self, processor: ParentChildIndexProcessor) -> None: + extract_setting = Mock() + expected = [Document(page_content="chunk", metadata={})] + + with patch( + "core.rag.index_processor.processor.parent_child_index_processor.ExtractProcessor.extract" + ) as mock_extract: + mock_extract.return_value = expected + documents = processor.extract(extract_setting, process_rule_mode="hierarchical") + + assert documents == expected + mock_extract.assert_called_once_with(extract_setting=extract_setting, is_automatic=True) + + def test_transform_validates_process_rule(self, processor: ParentChildIndexProcessor) -> None: + with pytest.raises(ValueError, match="No process rule found"): + processor.transform([Document(page_content="text", metadata={})], process_rule=None) + + with pytest.raises(ValueError, match="No rules found in process rule"): + processor.transform([Document(page_content="text", metadata={})], process_rule={"mode": "custom"}) + + def test_transform_paragraph_requires_segmentation(self, processor: ParentChildIndexProcessor) -> None: + rules = SimpleNamespace(parent_mode=ParentMode.PARAGRAPH, segmentation=None) + + with patch( + "core.rag.index_processor.processor.parent_child_index_processor.Rule.model_validate", return_value=rules + ): + with pytest.raises(ValueError, match="No segmentation found in rules"): + processor.transform( + [Document(page_content="text", metadata={})], + process_rule={"mode": "custom", "rules": {"enabled": True}}, + ) + + def test_transform_paragraph_builds_parent_and_child_docs(self, processor: ParentChildIndexProcessor) -> None: + splitter = Mock() + splitter.split_documents.return_value = [ + Document(page_content=".parent", metadata={}), + Document(page_content=" ", metadata={}), + ] + parent_document = Document(page_content="source", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}) + child_docs = [ChildDocument(page_content="child-1", metadata={"dataset_id": "dataset-1"})] + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.Rule.model_validate", + return_value=self._paragraph_rules(), + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.CleanProcessor.clean", + return_value=".parent", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="image", metadata={})] + ), + patch.object(processor, "_split_child_nodes", return_value=child_docs), + ): + result = processor.transform( + [parent_document], + process_rule={"mode": "custom", "rules": {"enabled": True}}, + preview=False, + ) + + assert len(result) == 1 + assert result[0].page_content == "parent" + assert result[0].children == child_docs + assert result[0].attachments is not None + + def test_transform_preview_returns_after_ten_parent_chunks(self, processor: ParentChildIndexProcessor) -> None: + splitter = Mock() + splitter.split_documents.return_value = [Document(page_content=f"chunk-{i}", metadata={}) for i in range(10)] + documents = [ + Document(page_content="doc-1", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}), + Document(page_content="doc-2", metadata={"dataset_id": "dataset-1", "document_id": "doc-2"}), + ] + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.Rule.model_validate", + return_value=self._paragraph_rules(), + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.CleanProcessor.clean", + side_effect=lambda text, _: text, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch.object(processor, "_get_content_files", return_value=[]), + patch.object(processor, "_split_child_nodes", return_value=[]), + ): + result = processor.transform( + documents, + process_rule={"mode": "custom", "rules": {"enabled": True}}, + preview=True, + ) + + assert len(result) == 10 + + def test_transform_full_doc_mode_trims_children_for_preview(self, processor: ParentChildIndexProcessor) -> None: + docs = [ + Document(page_content="first", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}), + Document(page_content="second", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}), + ] + child_docs = [ChildDocument(page_content=f"child-{i}", metadata={}) for i in range(5)] + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.Rule.model_validate", + return_value=self._full_doc_rules(), + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="image", metadata={})] + ), + patch.object(processor, "_split_child_nodes", return_value=child_docs), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.dify_config.CHILD_CHUNKS_PREVIEW_NUMBER", + 2, + ), + ): + result = processor.transform( + docs, + process_rule={"mode": "hierarchical", "rules": {"enabled": True}}, + preview=True, + ) + + assert len(result) == 1 + assert len(result[0].children or []) == 2 + assert result[0].attachments is not None + + def test_load_creates_vectors_for_child_docs(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + parent_doc = Document( + page_content="parent", + metadata={}, + children=[ + ChildDocument(page_content="child-1", metadata={}), + ChildDocument(page_content="child-2", metadata={}), + ], + ) + multimodal_docs = [AttachmentDocument(page_content="image", metadata={})] + + with patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls: + vector = mock_vector_cls.return_value + processor.load(dataset, [parent_doc], multimodal_documents=multimodal_docs) + + assert vector.create.call_count == 1 + formatted_docs = vector.create.call_args[0][0] + assert len(formatted_docs) == 2 + assert all(isinstance(doc, Document) for doc in formatted_docs) + vector.create_multimodal.assert_called_once_with(multimodal_docs) + + def test_clean_with_precomputed_child_ids(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + delete_query = Mock() + where_query = Mock() + where_query.delete.return_value = 2 + session = Mock() + session.query.return_value.where.return_value = where_query + + with ( + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + vector = mock_vector_cls.return_value + processor.clean( + dataset, + ["node-1"], + delete_child_chunks=True, + precomputed_child_node_ids=["child-1", "child-2"], + ) + + vector.delete_by_ids.assert_called_once_with(["child-1", "child-2"]) + where_query.delete.assert_called_once_with(synchronize_session=False) + session.commit.assert_called_once() + + def test_clean_queries_child_ids_when_not_precomputed( + self, processor: ParentChildIndexProcessor, dataset: Mock + ) -> None: + child_query = Mock() + child_query.join.return_value.where.return_value.all.return_value = [("child-1",), (None,), ("child-2",)] + session = Mock() + session.query.return_value = child_query + + with ( + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, ["node-1"], delete_child_chunks=False) + + vector.delete_by_ids.assert_called_once_with(["child-1", "child-2"]) + + def test_clean_dataset_wide_cleanup(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + where_query = Mock() + where_query.delete.return_value = 3 + session = Mock() + session.query.return_value.where.return_value = where_query + + with ( + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, None, delete_child_chunks=True) + + vector.delete.assert_called_once() + where_query.delete.assert_called_once_with(synchronize_session=False) + session.commit.assert_called_once() + + def test_clean_deletes_summaries_when_requested(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + segment_query = Mock() + segment_query.filter.return_value.all.return_value = [SimpleNamespace(id="seg-1")] + session = Mock() + session.query.return_value = segment_query + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = False + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.session_factory.create_session", + return_value=session_ctx, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector"), + ): + processor.clean(dataset, ["node-1"], delete_summaries=True, precomputed_child_node_ids=[]) + + mock_summary.assert_called_once_with(dataset, ["seg-1"]) + + def test_clean_deletes_all_summaries_when_node_ids_missing( + self, processor: ParentChildIndexProcessor, dataset: Mock + ) -> None: + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector"), + ): + processor.clean(dataset, None, delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, None) + + def test_retrieve_filters_by_score_threshold(self, processor: ParentChildIndexProcessor, dataset: Mock) -> None: + ok_result = SimpleNamespace(page_content="keep", metadata={"m": 1}, score=0.8) + low_result = SimpleNamespace(page_content="drop", metadata={"m": 2}, score=0.2) + + with patch( + "core.rag.index_processor.processor.parent_child_index_processor.RetrievalService.retrieve" + ) as mock_retrieve: + mock_retrieve.return_value = [ok_result, low_result] + reranking_model = {"reranking_provider_name": "", "reranking_model_name": ""} + docs = processor.retrieve("semantic_search", "query", dataset, 3, 0.5, reranking_model) + + assert len(docs) == 1 + assert docs[0].page_content == "keep" + assert docs[0].metadata["score"] == 0.8 + + def test_split_child_nodes_requires_subchunk_segmentation(self, processor: ParentChildIndexProcessor) -> None: + rules = SimpleNamespace(subchunk_segmentation=None) + + with pytest.raises(ValueError, match="No subchunk segmentation found"): + processor._split_child_nodes(Document(page_content="parent", metadata={}), rules, "custom", None) + + def test_split_child_nodes_generates_child_documents(self, processor: ParentChildIndexProcessor) -> None: + rules = SimpleNamespace(subchunk_segmentation=self._segmentation()) + splitter = Mock() + splitter.split_documents.return_value = [ + Document(page_content=".child-1", metadata={}), + Document(page_content=" ", metadata={}), + ] + + with ( + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + ): + child_docs = processor._split_child_nodes( + Document(page_content="parent", metadata={}), rules, "custom", None + ) + + assert len(child_docs) == 1 + assert child_docs[0].page_content == "child-1" + assert child_docs[0].metadata["doc_hash"] == "hash" + + def test_index_creates_process_rule_segments_and_vectors( + self, processor: ParentChildIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + parent_childs = SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + parent_child_chunks=[ + SimpleNamespace( + parent_content="parent text", + child_contents=["child-1", "child-2"], + files=[SimpleNamespace(id="file-1", filename="image.png")], + ) + ], + ) + dataset_rule = SimpleNamespace(id="rule-1") + session = Mock() + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.ParentChildStructureChunk.model_validate", + return_value=parent_childs, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.DatasetProcessRule", + return_value=dataset_rule, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + side_effect=lambda text: f"hash-{text}", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.DatasetDocumentStore" + ) as mock_store_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector") as mock_vector_cls, + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + processor.index(dataset, dataset_document, {"parent_child_chunks": []}) + + assert dataset_document.dataset_process_rule_id == "rule-1" + session.add.assert_called_once_with(dataset_rule) + session.flush.assert_called_once() + session.commit.assert_called_once() + mock_store_cls.return_value.add_documents.assert_called_once() + assert mock_vector_cls.return_value.create.call_count == 1 + mock_vector_cls.return_value.create_multimodal.assert_called_once() + + def test_index_uses_content_files_when_files_missing( + self, processor: ParentChildIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + parent_childs = SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + parent_child_chunks=[SimpleNamespace(parent_content="parent", child_contents=["child"], files=None)], + ) + dataset_rule = SimpleNamespace(id="rule-1") + session = Mock() + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.ParentChildStructureChunk.model_validate", + return_value=parent_childs, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.DatasetProcessRule", + return_value=dataset_rule, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.AccountService.load_user", + return_value=SimpleNamespace(id="user-1"), + ), + patch.object( + processor, "_get_content_files", return_value=[AttachmentDocument(page_content="image", metadata={})] + ) as mock_files, + patch("core.rag.index_processor.processor.parent_child_index_processor.DatasetDocumentStore"), + patch("core.rag.index_processor.processor.parent_child_index_processor.Vector"), + patch("core.rag.index_processor.processor.parent_child_index_processor.db.session", session), + ): + processor.index(dataset, dataset_document, {"parent_child_chunks": []}) + + mock_files.assert_called_once() + + def test_index_raises_when_account_missing( + self, processor: ParentChildIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + parent_childs = SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + parent_child_chunks=[SimpleNamespace(parent_content="parent", child_contents=["child"], files=None)], + ) + + with ( + patch( + "core.rag.index_processor.processor.parent_child_index_processor.ParentChildStructureChunk.model_validate", + return_value=parent_childs, + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.helper.generate_text_hash", + return_value="hash", + ), + patch( + "core.rag.index_processor.processor.parent_child_index_processor.AccountService.load_user", + return_value=None, + ), + ): + with pytest.raises(ValueError, match="Invalid account"): + processor.index(dataset, dataset_document, {"parent_child_chunks": []}) + + def test_format_preview_returns_parent_child_structure(self, processor: ParentChildIndexProcessor) -> None: + parent_childs = SimpleNamespace( + parent_mode=ParentMode.PARAGRAPH, + parent_child_chunks=[SimpleNamespace(parent_content="parent", child_contents=["child-1", "child-2"])], + ) + + with patch( + "core.rag.index_processor.processor.parent_child_index_processor.ParentChildStructureChunk.model_validate", + return_value=parent_childs, + ): + preview = processor.format_preview({"parent_child_chunks": []}) + + assert preview["chunk_structure"] == "hierarchical_model" + assert preview["parent_mode"] == ParentMode.PARAGRAPH + assert preview["total_segments"] == 1 + + def test_generate_summary_preview_sets_summaries(self, processor: ParentChildIndexProcessor) -> None: + preview_texts = [PreviewDetail(content="chunk-1"), PreviewDetail(content="chunk-2")] + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.ParagraphIndexProcessor.generate_summary", + return_value=("summary", None), + ): + result = processor.generate_summary_preview( + "tenant-1", preview_texts, {"enable": True}, doc_language="English" + ) + + assert all(item.summary == "summary" for item in result) + + def test_generate_summary_preview_raises_when_worker_fails(self, processor: ParentChildIndexProcessor) -> None: + preview_texts = [PreviewDetail(content="chunk-1")] + + with patch( + "core.rag.index_processor.processor.paragraph_index_processor.ParagraphIndexProcessor.generate_summary", + side_effect=RuntimeError("summary failed"), + ): + with pytest.raises(ValueError, match="Failed to generate summaries"): + processor.generate_summary_preview("tenant-1", preview_texts, {"enable": True}) + + def test_generate_summary_preview_falls_back_without_flask_context( + self, processor: ParentChildIndexProcessor + ) -> None: + preview_texts = [PreviewDetail(content="chunk-1")] + fake_current_app = SimpleNamespace(_get_current_object=Mock(side_effect=RuntimeError("no app"))) + + with ( + patch("flask.current_app", fake_current_app), + patch( + "core.rag.index_processor.processor.paragraph_index_processor.ParagraphIndexProcessor.generate_summary", + return_value=("summary", None), + ), + ): + result = processor.generate_summary_preview("tenant-1", preview_texts, {"enable": True}) + + assert result[0].summary == "summary" + + def test_generate_summary_preview_handles_timeout( + self, processor: ParentChildIndexProcessor, fake_executor_cls: type + ) -> None: + preview_texts = [PreviewDetail(content="chunk-1")] + future = Mock() + executor = fake_executor_cls(future) + + with ( + patch("concurrent.futures.ThreadPoolExecutor", return_value=executor), + patch("concurrent.futures.wait", side_effect=[(set(), {future}), (set(), set())]), + ): + with pytest.raises(ValueError, match="timeout"): + processor.generate_summary_preview("tenant-1", preview_texts, {"enable": True}) + + future.cancel.assert_called_once() diff --git a/api/tests/unit_tests/core/rag/indexing/processor/test_qa_index_processor.py b/api/tests/unit_tests/core/rag/indexing/processor/test_qa_index_processor.py new file mode 100644 index 0000000000..99323eeec9 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/processor/test_qa_index_processor.py @@ -0,0 +1,383 @@ +from types import SimpleNamespace +from unittest.mock import MagicMock, Mock, patch + +import pandas as pd +import pytest +from werkzeug.datastructures import FileStorage + +from core.entities.knowledge_entities import PreviewDetail +from core.rag.index_processor.processor.qa_index_processor import QAIndexProcessor +from core.rag.models.document import AttachmentDocument, Document + + +class _ImmediateThread: + def __init__(self, target, args=(), kwargs=None): + self._target = target + self._args = args + self._kwargs = kwargs or {} + + def start(self) -> None: + self._target(*self._args, **self._kwargs) + + def join(self) -> None: + return None + + +class TestQAIndexProcessor: + @pytest.fixture + def processor(self) -> QAIndexProcessor: + return QAIndexProcessor() + + @pytest.fixture + def dataset(self) -> Mock: + dataset = Mock() + dataset.id = "dataset-1" + dataset.tenant_id = "tenant-1" + dataset.indexing_technique = "high_quality" + dataset.is_multimodal = True + return dataset + + @pytest.fixture + def dataset_document(self) -> Mock: + document = Mock() + document.id = "doc-1" + document.created_by = "user-1" + return document + + @pytest.fixture + def process_rule(self) -> dict: + return { + "mode": "custom", + "rules": {"segmentation": {"max_tokens": 256, "chunk_overlap": 10, "separator": "\n"}}, + } + + def _rules(self) -> SimpleNamespace: + segmentation = SimpleNamespace(max_tokens=256, chunk_overlap=10, separator="\n") + return SimpleNamespace(segmentation=segmentation) + + def test_extract_forwards_automatic_flag(self, processor: QAIndexProcessor) -> None: + extract_setting = Mock() + expected_docs = [Document(page_content="chunk", metadata={})] + + with patch("core.rag.index_processor.processor.qa_index_processor.ExtractProcessor.extract") as mock_extract: + mock_extract.return_value = expected_docs + + docs = processor.extract(extract_setting, process_rule_mode="automatic") + + assert docs == expected_docs + mock_extract.assert_called_once_with(extract_setting=extract_setting, is_automatic=True) + + def test_transform_rejects_none_process_rule(self, processor: QAIndexProcessor) -> None: + with pytest.raises(ValueError, match="No process rule found"): + processor.transform([Document(page_content="text", metadata={})], process_rule=None) + + def test_transform_rejects_missing_rules_key(self, processor: QAIndexProcessor) -> None: + with pytest.raises(ValueError, match="No rules found in process rule"): + processor.transform([Document(page_content="text", metadata={})], process_rule={"mode": "custom"}) + + def test_transform_preview_calls_formatter_once( + self, processor: QAIndexProcessor, process_rule: dict, fake_flask_app + ) -> None: + document = Document(page_content="raw text", metadata={"dataset_id": "dataset-1", "document_id": "doc-1"}) + split_node = Document(page_content=".question", metadata={}) + splitter = Mock() + splitter.split_documents.return_value = [split_node] + + def _append_document(flask_app, tenant_id, document_node, all_qa_documents, document_language): + all_qa_documents.append(Document(page_content="Q1", metadata={"answer": "A1"})) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.Rule.model_validate", return_value=self._rules() + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.qa_index_processor.CleanProcessor.clean", return_value="clean text" + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.remove_leading_symbols", + side_effect=lambda text: text.lstrip("."), + ), + patch.object(processor, "_format_qa_document", side_effect=_append_document) as mock_format, + patch("core.rag.index_processor.processor.qa_index_processor.current_app") as mock_current_app, + ): + mock_current_app._get_current_object.return_value = fake_flask_app + result = processor.transform( + [document], + process_rule=process_rule, + preview=True, + tenant_id="tenant-1", + doc_language="English", + ) + + assert len(result) == 1 + assert result[0].metadata["answer"] == "A1" + mock_format.assert_called_once() + + def test_transform_non_preview_uses_thread_batches( + self, processor: QAIndexProcessor, process_rule: dict, fake_flask_app + ) -> None: + documents = [ + Document(page_content="doc-1", metadata={"document_id": "doc-1", "dataset_id": "dataset-1"}), + Document(page_content="doc-2", metadata={"document_id": "doc-2", "dataset_id": "dataset-1"}), + ] + split_node = Document(page_content="question", metadata={}) + splitter = Mock() + splitter.split_documents.return_value = [split_node] + + def _append_document(flask_app, tenant_id, document_node, all_qa_documents, document_language): + all_qa_documents.append(Document(page_content=f"Q-{document_node.page_content}", metadata={"answer": "A"})) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.Rule.model_validate", return_value=self._rules() + ), + patch.object(processor, "_get_splitter", return_value=splitter), + patch( + "core.rag.index_processor.processor.qa_index_processor.CleanProcessor.clean", + side_effect=lambda text, _: text, + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.remove_leading_symbols", + side_effect=lambda text: text, + ), + patch.object(processor, "_format_qa_document", side_effect=_append_document) as mock_format, + patch("core.rag.index_processor.processor.qa_index_processor.current_app") as mock_current_app, + patch( + "core.rag.index_processor.processor.qa_index_processor.threading.Thread", side_effect=_ImmediateThread + ), + ): + mock_current_app._get_current_object.return_value = fake_flask_app + result = processor.transform(documents, process_rule=process_rule, preview=False, tenant_id="tenant-1") + + assert len(result) == 2 + assert mock_format.call_count == 2 + + def test_format_by_template_validates_file_type(self, processor: QAIndexProcessor) -> None: + not_csv_file = Mock(spec=FileStorage) + not_csv_file.filename = "qa.txt" + + with pytest.raises(ValueError, match="Only CSV files"): + processor.format_by_template(not_csv_file) + + def test_format_by_template_parses_csv_rows(self, processor: QAIndexProcessor) -> None: + csv_file = Mock(spec=FileStorage) + csv_file.filename = "qa.csv" + dataframe = pd.DataFrame([["Q1", "A1"], ["Q2", "A2"]]) + + with patch("core.rag.index_processor.processor.qa_index_processor.pd.read_csv", return_value=dataframe): + docs = processor.format_by_template(csv_file) + + assert [doc.page_content for doc in docs] == ["Q1", "Q2"] + assert [doc.metadata["answer"] for doc in docs] == ["A1", "A2"] + + def test_format_by_template_raises_on_empty_csv(self, processor: QAIndexProcessor) -> None: + csv_file = Mock(spec=FileStorage) + csv_file.filename = "qa.csv" + + with patch("core.rag.index_processor.processor.qa_index_processor.pd.read_csv", return_value=pd.DataFrame()): + with pytest.raises(ValueError, match="empty"): + processor.format_by_template(csv_file) + + def test_format_by_template_raises_on_invalid_csv(self, processor: QAIndexProcessor) -> None: + csv_file = Mock(spec=FileStorage) + csv_file.filename = "qa.csv" + + with patch( + "core.rag.index_processor.processor.qa_index_processor.pd.read_csv", side_effect=Exception("bad csv") + ): + with pytest.raises(ValueError, match="bad csv"): + processor.format_by_template(csv_file) + + def test_load_creates_vectors_for_high_quality_dataset(self, processor: QAIndexProcessor, dataset: Mock) -> None: + docs = [Document(page_content="Q1", metadata={"answer": "A1"})] + multimodal_docs = [AttachmentDocument(page_content="image", metadata={})] + + with patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls: + vector = mock_vector_cls.return_value + processor.load(dataset, docs, multimodal_documents=multimodal_docs) + + vector.create.assert_called_once_with(docs) + vector.create_multimodal.assert_called_once_with(multimodal_docs) + + def test_load_skips_vector_for_non_high_quality(self, processor: QAIndexProcessor, dataset: Mock) -> None: + dataset.indexing_technique = "economy" + docs = [Document(page_content="Q1", metadata={"answer": "A1"})] + + with patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls: + processor.load(dataset, docs) + + mock_vector_cls.assert_not_called() + + def test_clean_handles_summary_deletion_and_vector_cleanup( + self, processor: QAIndexProcessor, dataset: Mock + ) -> None: + mock_segment = SimpleNamespace(id="seg-1") + mock_query = Mock() + mock_query.filter.return_value.all.return_value = [mock_segment] + mock_session = Mock() + mock_session.query.return_value = mock_query + session_context = MagicMock() + session_context.__enter__.return_value = mock_session + session_context.__exit__.return_value = False + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.session_factory.create_session", + return_value=session_context, + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls, + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, ["node-1"], delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, ["seg-1"]) + vector.delete_by_ids.assert_called_once_with(["node-1"]) + + def test_clean_handles_dataset_wide_cleanup(self, processor: QAIndexProcessor, dataset: Mock) -> None: + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.SummaryIndexService.delete_summaries_for_segments" + ) as mock_summary, + patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls, + ): + vector = mock_vector_cls.return_value + processor.clean(dataset, None, delete_summaries=True) + + mock_summary.assert_called_once_with(dataset, None) + vector.delete.assert_called_once() + + def test_retrieve_filters_by_score_threshold(self, processor: QAIndexProcessor, dataset: Mock) -> None: + result_ok = SimpleNamespace(page_content="accepted", metadata={"source": "a"}, score=0.9) + result_low = SimpleNamespace(page_content="rejected", metadata={"source": "b"}, score=0.1) + + with patch("core.rag.index_processor.processor.qa_index_processor.RetrievalService.retrieve") as mock_retrieve: + mock_retrieve.return_value = [result_ok, result_low] + reranking_model = {"reranking_provider_name": "", "reranking_model_name": ""} + docs = processor.retrieve("semantic_search", "query", dataset, 5, 0.5, reranking_model) + + assert len(docs) == 1 + assert docs[0].page_content == "accepted" + assert docs[0].metadata["score"] == 0.9 + + def test_index_adds_documents_and_vectors_for_high_quality( + self, processor: QAIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + qa_chunks = SimpleNamespace( + qa_chunks=[ + SimpleNamespace(question="Q1", answer="A1"), + SimpleNamespace(question="Q2", answer="A2"), + ] + ) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.QAStructureChunk.model_validate", + return_value=qa_chunks, + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + patch("core.rag.index_processor.processor.qa_index_processor.DatasetDocumentStore") as mock_store_cls, + patch("core.rag.index_processor.processor.qa_index_processor.Vector") as mock_vector_cls, + ): + processor.index(dataset, dataset_document, {"qa_chunks": []}) + + mock_store_cls.return_value.add_documents.assert_called_once() + mock_vector_cls.return_value.create.assert_called_once() + + def test_index_requires_high_quality( + self, processor: QAIndexProcessor, dataset: Mock, dataset_document: Mock + ) -> None: + dataset.indexing_technique = "economy" + qa_chunks = SimpleNamespace(qa_chunks=[SimpleNamespace(question="Q1", answer="A1")]) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.QAStructureChunk.model_validate", + return_value=qa_chunks, + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + patch("core.rag.index_processor.processor.qa_index_processor.DatasetDocumentStore"), + ): + with pytest.raises(ValueError, match="must be high quality"): + processor.index(dataset, dataset_document, {"qa_chunks": []}) + + def test_format_preview_returns_qa_preview(self, processor: QAIndexProcessor) -> None: + qa_chunks = SimpleNamespace(qa_chunks=[SimpleNamespace(question="Q1", answer="A1")]) + + with patch( + "core.rag.index_processor.processor.qa_index_processor.QAStructureChunk.model_validate", + return_value=qa_chunks, + ): + preview = processor.format_preview({"qa_chunks": []}) + + assert preview["chunk_structure"] == "qa_model" + assert preview["total_segments"] == 1 + assert preview["qa_preview"] == [{"question": "Q1", "answer": "A1"}] + + def test_generate_summary_preview_returns_input(self, processor: QAIndexProcessor) -> None: + preview_items = [PreviewDetail(content="Q1")] + assert processor.generate_summary_preview("tenant-1", preview_items, {}) is preview_items + + def test_format_qa_document_ignores_blank_text(self, processor: QAIndexProcessor, fake_flask_app) -> None: + all_qa_documents: list[Document] = [] + blank_document = Document(page_content=" ", metadata={}) + + processor._format_qa_document(fake_flask_app, "tenant-1", blank_document, all_qa_documents, "English") + + assert all_qa_documents == [] + + def test_format_qa_document_builds_question_answer_documents( + self, processor: QAIndexProcessor, fake_flask_app + ) -> None: + all_qa_documents: list[Document] = [] + source_document = Document(page_content="source text", metadata={"origin": "doc-1"}) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.LLMGenerator.generate_qa_document", + return_value="Q1: What is this?\nA1: A test.\nQ2: Why?\nA2: Coverage.", + ), + patch( + "core.rag.index_processor.processor.qa_index_processor.helper.generate_text_hash", return_value="hash" + ), + ): + processor._format_qa_document(fake_flask_app, "tenant-1", source_document, all_qa_documents, "English") + + assert len(all_qa_documents) == 2 + assert all_qa_documents[0].page_content == "What is this?" + assert all_qa_documents[0].metadata["answer"] == "A test." + assert all_qa_documents[1].metadata["answer"] == "Coverage." + + def test_format_qa_document_logs_errors(self, processor: QAIndexProcessor, fake_flask_app) -> None: + all_qa_documents: list[Document] = [] + source_document = Document(page_content="source text", metadata={"origin": "doc-1"}) + + with ( + patch( + "core.rag.index_processor.processor.qa_index_processor.LLMGenerator.generate_qa_document", + side_effect=RuntimeError("llm failure"), + ), + patch("core.rag.index_processor.processor.qa_index_processor.logger") as mock_logger, + ): + processor._format_qa_document(fake_flask_app, "tenant-1", source_document, all_qa_documents, "English") + + assert all_qa_documents == [] + mock_logger.exception.assert_called_once_with("Failed to format qa document") + + def test_format_split_text_extracts_question_answer_pairs(self, processor: QAIndexProcessor) -> None: + parsed = processor._format_split_text("Q1: First?\nA1: One.\nQ2: Second?\nA2: Two.\n") + + assert parsed == [{"question": "First?", "answer": "One."}, {"question": "Second?", "answer": "Two."}] diff --git a/api/tests/unit_tests/core/rag/indexing/test_index_processor_base.py b/api/tests/unit_tests/core/rag/indexing/test_index_processor_base.py new file mode 100644 index 0000000000..b31bb6eea7 --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/test_index_processor_base.py @@ -0,0 +1,291 @@ +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import httpx +import pytest + +from core.entities.knowledge_entities import PreviewDetail +from core.rag.index_processor.constant.doc_type import DocType +from core.rag.index_processor.index_processor_base import BaseIndexProcessor +from core.rag.models.document import AttachmentDocument, Document + + +class _ForwardingBaseIndexProcessor(BaseIndexProcessor): + def extract(self, extract_setting, **kwargs): + return super().extract(extract_setting, **kwargs) + + def transform(self, documents, current_user=None, **kwargs): + return super().transform(documents, current_user=current_user, **kwargs) + + def generate_summary_preview(self, tenant_id, preview_texts, summary_index_setting, doc_language=None): + return super().generate_summary_preview( + tenant_id=tenant_id, + preview_texts=preview_texts, + summary_index_setting=summary_index_setting, + doc_language=doc_language, + ) + + def load(self, dataset, documents, multimodal_documents=None, with_keywords=True, **kwargs): + return super().load( + dataset=dataset, + documents=documents, + multimodal_documents=multimodal_documents, + with_keywords=with_keywords, + **kwargs, + ) + + def clean(self, dataset, node_ids, with_keywords=True, **kwargs): + return super().clean(dataset=dataset, node_ids=node_ids, with_keywords=with_keywords, **kwargs) + + def index(self, dataset, document, chunks): + return super().index(dataset=dataset, document=document, chunks=chunks) + + def format_preview(self, chunks): + return super().format_preview(chunks) + + def retrieve(self, retrieval_method, query, dataset, top_k, score_threshold, reranking_model): + return super().retrieve( + retrieval_method=retrieval_method, + query=query, + dataset=dataset, + top_k=top_k, + score_threshold=score_threshold, + reranking_model=reranking_model, + ) + + +class TestBaseIndexProcessor: + @pytest.fixture + def processor(self) -> _ForwardingBaseIndexProcessor: + return _ForwardingBaseIndexProcessor() + + def test_abstract_methods_raise_not_implemented(self, processor: _ForwardingBaseIndexProcessor) -> None: + with pytest.raises(NotImplementedError): + processor.extract(Mock()) + with pytest.raises(NotImplementedError): + processor.transform([]) + with pytest.raises(NotImplementedError): + processor.generate_summary_preview("tenant", [PreviewDetail(content="c")], {}) + with pytest.raises(NotImplementedError): + processor.load(Mock(), []) + with pytest.raises(NotImplementedError): + processor.clean(Mock(), None) + with pytest.raises(NotImplementedError): + processor.index(Mock(), Mock(), {}) + with pytest.raises(NotImplementedError): + processor.format_preview([]) + with pytest.raises(NotImplementedError): + processor.retrieve("semantic_search", "q", Mock(), 3, 0.5, {}) + + def test_get_splitter_validates_custom_length(self, processor: _ForwardingBaseIndexProcessor) -> None: + with patch( + "core.rag.index_processor.index_processor_base.dify_config.INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH", 1000 + ): + with pytest.raises(ValueError, match="between 50 and 1000"): + processor._get_splitter("custom", 49, 0, "", None) + with pytest.raises(ValueError, match="between 50 and 1000"): + processor._get_splitter("custom", 1001, 0, "", None) + + def test_get_splitter_custom_mode_uses_fixed_splitter(self, processor: _ForwardingBaseIndexProcessor) -> None: + fixed_splitter = Mock() + with patch( + "core.rag.index_processor.index_processor_base.FixedRecursiveCharacterTextSplitter.from_encoder", + return_value=fixed_splitter, + ) as mock_fixed: + splitter = processor._get_splitter("hierarchical", 120, 10, "\\n\\n", None) + + assert splitter is fixed_splitter + assert mock_fixed.call_args.kwargs["fixed_separator"] == "\n\n" + assert mock_fixed.call_args.kwargs["chunk_size"] == 120 + + def test_get_splitter_automatic_mode_uses_enhance_splitter(self, processor: _ForwardingBaseIndexProcessor) -> None: + auto_splitter = Mock() + with patch( + "core.rag.index_processor.index_processor_base.EnhanceRecursiveCharacterTextSplitter.from_encoder", + return_value=auto_splitter, + ) as mock_enhance: + splitter = processor._get_splitter("automatic", 0, 0, "", None) + + assert splitter is auto_splitter + assert "chunk_size" in mock_enhance.call_args.kwargs + + def test_extract_markdown_images(self, processor: _ForwardingBaseIndexProcessor) -> None: + markdown = "text  and " + images = processor._extract_markdown_images(markdown) + assert images == ["https://a/img.png", "/files/123/file-preview"] + + def test_get_content_files_without_images_returns_empty(self, processor: _ForwardingBaseIndexProcessor) -> None: + document = Document(page_content="no image markdown", metadata={"document_id": "doc-1", "dataset_id": "ds-1"}) + assert processor._get_content_files(document) == [] + + def test_get_content_files_handles_all_sources_and_duplicates( + self, processor: _ForwardingBaseIndexProcessor + ) -> None: + document = Document(page_content="ignored", metadata={"document_id": "doc-1", "dataset_id": "ds-1"}) + images = [ + "/files/aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/image-preview", + "/files/aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/image-preview", + "/files/bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb/file-preview", + "/files/tools/cccccccc-cccc-cccc-cccc-cccccccccccc.png", + "https://example.com/remote.png?x=1", + ] + upload_a = SimpleNamespace(id="aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa", name="a.png") + upload_b = SimpleNamespace(id="bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb", name="b.png") + upload_tool = SimpleNamespace(id="tool-upload-id", name="tool.png") + upload_remote = SimpleNamespace(id="remote-upload-id", name="remote.png") + db_query = Mock() + db_query.where.return_value.all.return_value = [upload_a, upload_b, upload_tool, upload_remote] + db_session = Mock() + db_session.query.return_value = db_query + + with ( + patch.object(processor, "_extract_markdown_images", return_value=images), + patch.object(processor, "_download_tool_file", return_value="tool-upload-id") as mock_tool_download, + patch.object(processor, "_download_image", return_value="remote-upload-id") as mock_image_download, + patch("core.rag.index_processor.index_processor_base.db.session", db_session), + ): + files = processor._get_content_files(document, current_user=Mock()) + + assert len(files) == 5 + assert all(isinstance(file, AttachmentDocument) for file in files) + assert files[0].metadata["doc_type"] == DocType.IMAGE + assert files[0].metadata["document_id"] == "doc-1" + assert files[0].metadata["dataset_id"] == "ds-1" + assert files[0].metadata["doc_id"] == "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + assert files[1].metadata["doc_id"] == "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa" + mock_tool_download.assert_called_once() + mock_image_download.assert_called_once() + + def test_get_content_files_skips_tool_and_remote_download_without_user( + self, processor: _ForwardingBaseIndexProcessor + ) -> None: + document = Document(page_content="ignored", metadata={"document_id": "doc-1", "dataset_id": "ds-1"}) + images = ["/files/tools/cccccccc-cccc-cccc-cccc-cccccccccccc.png", "https://example.com/remote.png"] + + with patch.object(processor, "_extract_markdown_images", return_value=images): + files = processor._get_content_files(document, current_user=None) + + assert files == [] + + def test_get_content_files_ignores_missing_upload_records(self, processor: _ForwardingBaseIndexProcessor) -> None: + document = Document(page_content="ignored", metadata={"document_id": "doc-1", "dataset_id": "ds-1"}) + images = ["/files/aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa/image-preview"] + db_query = Mock() + db_query.where.return_value.all.return_value = [] + db_session = Mock() + db_session.query.return_value = db_query + + with ( + patch.object(processor, "_extract_markdown_images", return_value=images), + patch("core.rag.index_processor.index_processor_base.db.session", db_session), + ): + files = processor._get_content_files(document) + + assert files == [] + + def test_download_image_success_with_filename_from_content_disposition( + self, processor: _ForwardingBaseIndexProcessor + ) -> None: + response = Mock() + response.headers = { + "Content-Length": "4", + "content-disposition": "attachment; filename=test-image.png", + "content-type": "image/png", + } + response.raise_for_status.return_value = None + response.iter_bytes.return_value = [b"data"] + upload_result = SimpleNamespace(id="upload-id") + + mock_db = Mock() + mock_db.engine = Mock() + + with ( + patch("core.rag.index_processor.index_processor_base.ssrf_proxy.get", return_value=response), + patch("core.rag.index_processor.index_processor_base.db", mock_db), + patch("services.file_service.FileService") as mock_file_service, + ): + mock_file_service.return_value.upload_file.return_value = upload_result + upload_id = processor._download_image("https://example.com/test.png", current_user=Mock()) + + assert upload_id == "upload-id" + mock_file_service.return_value.upload_file.assert_called_once() + + def test_download_image_validates_size_and_empty_content(self, processor: _ForwardingBaseIndexProcessor) -> None: + too_large = Mock() + too_large.headers = {"Content-Length": str(3 * 1024 * 1024), "content-type": "image/png"} + too_large.raise_for_status.return_value = None + + with patch("core.rag.index_processor.index_processor_base.ssrf_proxy.get", return_value=too_large): + assert processor._download_image("https://example.com/too-large.png", current_user=Mock()) is None + + empty = Mock() + empty.headers = {"Content-Length": "0", "content-type": "image/png"} + empty.raise_for_status.return_value = None + empty.iter_bytes.return_value = [] + + with patch("core.rag.index_processor.index_processor_base.ssrf_proxy.get", return_value=empty): + assert processor._download_image("https://example.com/empty.png", current_user=Mock()) is None + + def test_download_image_limits_stream_size(self, processor: _ForwardingBaseIndexProcessor) -> None: + response = Mock() + response.headers = {"content-type": "image/png"} + response.raise_for_status.return_value = None + response.iter_bytes.return_value = [b"a" * (3 * 1024 * 1024)] + + with patch("core.rag.index_processor.index_processor_base.ssrf_proxy.get", return_value=response): + assert processor._download_image("https://example.com/big-stream.png", current_user=Mock()) is None + + def test_download_image_handles_timeout_request_and_unexpected_errors( + self, processor: _ForwardingBaseIndexProcessor + ) -> None: + request = httpx.Request("GET", "https://example.com/image.png") + + with patch( + "core.rag.index_processor.index_processor_base.ssrf_proxy.get", + side_effect=httpx.TimeoutException("timeout"), + ): + assert processor._download_image("https://example.com/image.png", current_user=Mock()) is None + + with patch( + "core.rag.index_processor.index_processor_base.ssrf_proxy.get", + side_effect=httpx.RequestError("bad request", request=request), + ): + assert processor._download_image("https://example.com/image.png", current_user=Mock()) is None + + with patch( + "core.rag.index_processor.index_processor_base.ssrf_proxy.get", + side_effect=RuntimeError("unexpected"), + ): + assert processor._download_image("https://example.com/image.png", current_user=Mock()) is None + + def test_download_tool_file_returns_none_when_not_found(self, processor: _ForwardingBaseIndexProcessor) -> None: + db_query = Mock() + db_query.where.return_value.first.return_value = None + db_session = Mock() + db_session.query.return_value = db_query + + with patch("core.rag.index_processor.index_processor_base.db.session", db_session): + assert processor._download_tool_file("tool-id", current_user=Mock()) is None + + def test_download_tool_file_uploads_file_when_found(self, processor: _ForwardingBaseIndexProcessor) -> None: + tool_file = SimpleNamespace(file_key="k1", name="tool.png", mimetype="image/png") + db_query = Mock() + db_query.where.return_value.first.return_value = tool_file + db_session = Mock() + db_session.query.return_value = db_query + mock_db = Mock() + mock_db.session = db_session + mock_db.engine = Mock() + upload_result = SimpleNamespace(id="upload-id") + + with ( + patch("core.rag.index_processor.index_processor_base.db", mock_db), + patch("core.rag.index_processor.index_processor_base.storage.load_once", return_value=b"blob") as mock_load, + patch("services.file_service.FileService") as mock_file_service, + ): + mock_file_service.return_value.upload_file.return_value = upload_result + result = processor._download_tool_file("tool-id", current_user=Mock()) + + assert result == "upload-id" + mock_load.assert_called_once_with("k1") + mock_file_service.return_value.upload_file.assert_called_once() diff --git a/api/tests/unit_tests/core/rag/indexing/test_index_processor_factory.py b/api/tests/unit_tests/core/rag/indexing/test_index_processor_factory.py new file mode 100644 index 0000000000..0fc666dbbf --- /dev/null +++ b/api/tests/unit_tests/core/rag/indexing/test_index_processor_factory.py @@ -0,0 +1,42 @@ +import pytest + +from core.rag.index_processor.constant.index_type import IndexStructureType +from core.rag.index_processor.index_processor_factory import IndexProcessorFactory +from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor +from core.rag.index_processor.processor.parent_child_index_processor import ParentChildIndexProcessor +from core.rag.index_processor.processor.qa_index_processor import QAIndexProcessor + + +class TestIndexProcessorFactory: + def test_requires_index_type(self) -> None: + factory = IndexProcessorFactory(index_type=None) + + with pytest.raises(ValueError, match="Index type must be specified"): + factory.init_index_processor() + + def test_builds_paragraph_processor(self) -> None: + factory = IndexProcessorFactory(index_type=IndexStructureType.PARAGRAPH_INDEX) + + processor = factory.init_index_processor() + + assert isinstance(processor, ParagraphIndexProcessor) + + def test_builds_qa_processor(self) -> None: + factory = IndexProcessorFactory(index_type=IndexStructureType.QA_INDEX) + + processor = factory.init_index_processor() + + assert isinstance(processor, QAIndexProcessor) + + def test_builds_parent_child_processor(self) -> None: + factory = IndexProcessorFactory(index_type=IndexStructureType.PARENT_CHILD_INDEX) + + processor = factory.init_index_processor() + + assert isinstance(processor, ParentChildIndexProcessor) + + def test_rejects_unsupported_index_type(self) -> None: + factory = IndexProcessorFactory(index_type="unsupported") + + with pytest.raises(ValueError, match="is not supported"): + factory.init_index_processor() diff --git a/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py b/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py index c00fee8fe5..b011ade884 100644 --- a/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py +++ b/api/tests/unit_tests/core/rag/indexing/test_indexing_runner.py @@ -61,9 +61,9 @@ from core.indexing_runner import ( DocumentIsPausedError, IndexingRunner, ) -from core.model_runtime.entities.model_entities import ModelType from core.rag.index_processor.constant.index_type import IndexStructureType from core.rag.models.document import ChildDocument, Document +from dify_graph.model_runtime.entities.model_entities import ModelType from libs.datetime_utils import naive_utc_now from models.dataset import Dataset, DatasetProcessRule from models.dataset import Document as DatasetDocument diff --git a/api/tests/unit_tests/core/rag/rerank/test_reranker.py b/api/tests/unit_tests/core/rag/rerank/test_reranker.py index e4597e7f8c..b150d677f1 100644 --- a/api/tests/unit_tests/core/rag/rerank/test_reranker.py +++ b/api/tests/unit_tests/core/rag/rerank/test_reranker.py @@ -12,21 +12,26 @@ All tests use mocking to avoid external dependencies and ensure fast, reliable e Tests follow the Arrange-Act-Assert pattern for clarity. """ +from operator import itemgetter +from types import SimpleNamespace from unittest.mock import MagicMock, Mock, patch import pytest from core.model_manager import ModelInstance -from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult +from core.rag.index_processor.constant.doc_type import DocType +from core.rag.index_processor.constant.query_type import QueryType from core.rag.models.document import Document from core.rag.rerank.entity.weight import KeywordSetting, VectorSetting, Weights +from core.rag.rerank.rerank_base import BaseRerankRunner from core.rag.rerank.rerank_factory import RerankRunnerFactory from core.rag.rerank.rerank_model import RerankModelRunner from core.rag.rerank.rerank_type import RerankMode from core.rag.rerank.weight_rerank import WeightRerankRunner +from dify_graph.model_runtime.entities.rerank_entities import RerankDocument, RerankResult -def create_mock_model_instance(): +def create_mock_model_instance() -> ModelInstance: """Create a properly configured mock ModelInstance for reranking tests.""" mock_instance = Mock(spec=ModelInstance) # Setup provider_model_bundle chain for check_model_support_vision @@ -59,14 +64,7 @@ class TestRerankModelRunner: @pytest.fixture def mock_model_instance(self): """Create a mock ModelInstance for reranking.""" - mock_instance = Mock(spec=ModelInstance) - # Setup provider_model_bundle chain for check_model_support_vision - mock_instance.provider_model_bundle = Mock() - mock_instance.provider_model_bundle.configuration = Mock() - mock_instance.provider_model_bundle.configuration.tenant_id = "test-tenant-id" - mock_instance.provider = "test-provider" - mock_instance.model_name = "test-model" - return mock_instance + return create_mock_model_instance() @pytest.fixture def rerank_runner(self, mock_model_instance): @@ -382,6 +380,206 @@ class TestRerankModelRunner: assert call_kwargs["user"] == "user123" +class _ForwardingBaseRerankRunner(BaseRerankRunner): + def run( + self, + query: str, + documents: list[Document], + score_threshold: float | None = None, + top_n: int | None = None, + user: str | None = None, + query_type: QueryType = QueryType.TEXT_QUERY, + ) -> list[Document]: + return super().run( + query=query, + documents=documents, + score_threshold=score_threshold, + top_n=top_n, + user=user, + query_type=query_type, + ) + + +class TestBaseRerankRunner: + def test_run_raises_not_implemented(self): + runner = _ForwardingBaseRerankRunner() + + with pytest.raises(NotImplementedError): + runner.run(query="python", documents=[]) + + +class TestRerankModelRunnerMultimodal: + @pytest.fixture + def mock_model_instance(self): + return create_mock_model_instance() + + @pytest.fixture + def rerank_runner(self, mock_model_instance): + return RerankModelRunner(rerank_model_instance=mock_model_instance) + + def test_run_returns_original_documents_for_non_text_query_without_vision_support( + self, rerank_runner, mock_model_instance + ): + documents = [ + Document(page_content="doc", metadata={"doc_id": "doc1"}, provider="dify"), + ] + + with patch("core.rag.rerank.rerank_model.ModelManager") as mock_mm: + mock_mm.return_value.check_model_support_vision.return_value = False + result = rerank_runner.run(query="image-file-id", documents=documents, query_type=QueryType.IMAGE_QUERY) + + assert result == documents + mock_model_instance.invoke_rerank.assert_not_called() + + def test_run_uses_multimodal_path_when_vision_support_is_enabled(self, rerank_runner): + documents = [ + Document(page_content="doc", metadata={"doc_id": "doc1", "source": "wiki"}, provider="dify"), + ] + rerank_result = RerankResult( + model="rerank-model", + docs=[RerankDocument(index=0, text="doc", score=0.88)], + ) + + with ( + patch("core.rag.rerank.rerank_model.ModelManager") as mock_mm, + patch.object( + rerank_runner, + "fetch_multimodal_rerank", + return_value=(rerank_result, documents), + ) as mock_multimodal, + ): + mock_mm.return_value.check_model_support_vision.return_value = True + result = rerank_runner.run(query="python", documents=documents, query_type=QueryType.TEXT_QUERY) + + mock_multimodal.assert_called_once() + assert len(result) == 1 + assert result[0].metadata["score"] == 0.88 + + def test_fetch_multimodal_rerank_builds_docs_and_calls_text_rerank(self, rerank_runner): + image_doc = Document( + page_content="image-content", + metadata={"doc_id": "img-1", "doc_type": DocType.IMAGE}, + provider="dify", + ) + text_doc = Document( + page_content="text-content", + metadata={"doc_id": "txt-1", "doc_type": DocType.TEXT}, + provider="dify", + ) + external_doc = Document( + page_content="external-content", + metadata={}, + provider="external", + ) + query = Mock() + query.where.return_value.first.return_value = SimpleNamespace(key="image-key") + rerank_result = RerankResult(model="rerank-model", docs=[]) + + with ( + patch("core.rag.rerank.rerank_model.db.session.query", return_value=query), + patch("core.rag.rerank.rerank_model.storage.load_once", return_value=b"image-bytes") as mock_load_once, + patch.object( + rerank_runner, + "fetch_text_rerank", + return_value=(rerank_result, [image_doc, text_doc, external_doc]), + ) as mock_text_rerank, + ): + result, unique_documents = rerank_runner.fetch_multimodal_rerank( + query="python", + documents=[image_doc, text_doc, external_doc, external_doc], + query_type=QueryType.TEXT_QUERY, + ) + + assert result == rerank_result + assert len(unique_documents) == 3 + mock_load_once.assert_called_once_with("image-key") + text_rerank_call_args = mock_text_rerank.call_args.args + assert len(text_rerank_call_args[1]) == 3 + + def test_fetch_multimodal_rerank_skips_missing_image_upload(self, rerank_runner): + image_doc = Document( + page_content="image-content", + metadata={"doc_id": "img-missing", "doc_type": DocType.IMAGE}, + provider="dify", + ) + query = Mock() + query.where.return_value.first.return_value = None + rerank_result = RerankResult(model="rerank-model", docs=[]) + + with ( + patch("core.rag.rerank.rerank_model.db.session.query", return_value=query), + patch.object( + rerank_runner, + "fetch_text_rerank", + return_value=(rerank_result, [image_doc]), + ) as mock_text_rerank, + ): + result, unique_documents = rerank_runner.fetch_multimodal_rerank( + query="python", + documents=[image_doc], + query_type=QueryType.TEXT_QUERY, + ) + + assert result == rerank_result + assert unique_documents == [image_doc] + docs_arg = mock_text_rerank.call_args.args[1] + assert len(docs_arg) == 1 + + def test_fetch_multimodal_rerank_image_query_invokes_multimodal_model(self, rerank_runner, mock_model_instance): + text_doc = Document( + page_content="text-content", + metadata={"doc_id": "txt-1", "doc_type": DocType.TEXT}, + provider="dify", + ) + query_chain = Mock() + query_chain.where.return_value.first.return_value = SimpleNamespace(key="query-image-key") + rerank_result = RerankResult( + model="rerank-model", + docs=[RerankDocument(index=0, text="text-content", score=0.77)], + ) + mock_model_instance.invoke_multimodal_rerank.return_value = rerank_result + + with ( + patch("core.rag.rerank.rerank_model.db.session.query", return_value=query_chain), + patch("core.rag.rerank.rerank_model.storage.load_once", return_value=b"query-image-bytes"), + ): + result, unique_documents = rerank_runner.fetch_multimodal_rerank( + query="query-upload-id", + documents=[text_doc], + score_threshold=0.2, + top_n=2, + user="user-1", + query_type=QueryType.IMAGE_QUERY, + ) + + assert result == rerank_result + assert unique_documents == [text_doc] + invoke_kwargs = mock_model_instance.invoke_multimodal_rerank.call_args.kwargs + assert invoke_kwargs["query"]["content_type"] == DocType.IMAGE + assert invoke_kwargs["docs"][0]["content"] == "text-content" + assert invoke_kwargs["user"] == "user-1" + + def test_fetch_multimodal_rerank_raises_when_query_image_not_found(self, rerank_runner): + query_chain = Mock() + query_chain.where.return_value.first.return_value = None + + with patch("core.rag.rerank.rerank_model.db.session.query", return_value=query_chain): + with pytest.raises(ValueError, match="Upload file not found for query"): + rerank_runner.fetch_multimodal_rerank( + query="missing-upload-id", + documents=[], + query_type=QueryType.IMAGE_QUERY, + ) + + def test_fetch_multimodal_rerank_rejects_unsupported_query_type(self, rerank_runner): + with pytest.raises(ValueError, match="is not supported"): + rerank_runner.fetch_multimodal_rerank( + query="python", + documents=[], + query_type="unsupported_query_type", + ) + + class TestWeightRerankRunner: """Unit tests for WeightRerankRunner. @@ -512,34 +710,39 @@ class TestWeightRerankRunner: - TF-IDF scores are calculated correctly - Cosine similarity is computed for keyword vectors """ - # Arrange: Create runner runner = WeightRerankRunner(tenant_id="tenant123", weights=weights_config) - - # Mock keyword extraction with specific keywords + keyword_map = { + "python programming": ["python", "programming"], + "Python is a programming language": ["python", "programming", "language"], + "JavaScript for web development": ["javascript", "web"], + "Java object-oriented programming": ["java", "programming"], + } mock_handler_instance = MagicMock() - mock_handler_instance.extract_keywords.side_effect = [ - ["python", "programming"], # query - ["python", "programming", "language"], # doc1 - ["javascript", "web"], # doc2 - ["java", "programming"], # doc3 - ] + mock_handler_instance.extract_keywords.side_effect = lambda text, _: keyword_map[text] mock_jieba_handler.return_value = mock_handler_instance - # Mock embedding mock_embedding_instance = MagicMock() mock_model_manager.return_value.get_model_instance.return_value = mock_embedding_instance mock_cache_instance = MagicMock() mock_cache_instance.embed_query.return_value = [0.1, 0.2, 0.3, 0.4] mock_cache_embedding.return_value = mock_cache_instance - # Act: Run reranking + query_scores = runner._calculate_keyword_score("python programming", sample_documents_with_vectors) + vector_scores = runner._calculate_cosine( + "tenant123", "python programming", sample_documents_with_vectors, weights_config.vector_setting + ) + expected_scores = { + doc.metadata["doc_id"]: (0.6 * vector_score + 0.4 * query_score) + for doc, query_score, vector_score in zip(sample_documents_with_vectors, query_scores, vector_scores) + } + result = runner.run(query="python programming", documents=sample_documents_with_vectors) - # Assert: Keywords are extracted and scores are calculated - assert len(result) == 3 - # Document 1 should have highest keyword score (matches both query terms) - # Document 3 should have medium score (matches one term) - # Document 2 should have lowest score (matches no terms) + expected_order = [doc_id for doc_id, _ in sorted(expected_scores.items(), key=itemgetter(1), reverse=True)] + assert [doc.metadata["doc_id"] for doc in result] == expected_order + for doc in result: + doc_id = doc.metadata["doc_id"] + assert doc.metadata["score"] == pytest.approx(expected_scores[doc_id], rel=1e-6) def test_vector_score_calculation( self, @@ -556,30 +759,42 @@ class TestWeightRerankRunner: - Cosine similarity is calculated with document vectors - Vector scores are properly normalized """ - # Arrange: Create runner runner = WeightRerankRunner(tenant_id="tenant123", weights=weights_config) - # Mock keyword extraction + keyword_map = { + "test query": ["test"], + "Python is a programming language": ["python"], + "JavaScript for web development": ["javascript"], + "Java object-oriented programming": ["java"], + } mock_handler_instance = MagicMock() - mock_handler_instance.extract_keywords.return_value = ["test"] + mock_handler_instance.extract_keywords.side_effect = lambda text, _: keyword_map[text] mock_jieba_handler.return_value = mock_handler_instance - # Mock embedding model mock_embedding_instance = MagicMock() mock_model_manager.return_value.get_model_instance.return_value = mock_embedding_instance - # Mock cache embedding with specific query vector mock_cache_instance = MagicMock() query_vector = [0.2, 0.3, 0.4, 0.5] mock_cache_instance.embed_query.return_value = query_vector mock_cache_embedding.return_value = mock_cache_instance - # Act: Run reranking + query_scores = runner._calculate_keyword_score("test query", sample_documents_with_vectors) + vector_scores = runner._calculate_cosine( + "tenant123", "test query", sample_documents_with_vectors, weights_config.vector_setting + ) + expected_scores = { + doc.metadata["doc_id"]: (0.6 * vector_score + 0.4 * query_score) + for doc, query_score, vector_score in zip(sample_documents_with_vectors, query_scores, vector_scores) + } + result = runner.run(query="test query", documents=sample_documents_with_vectors) - # Assert: Vector scores are calculated - assert len(result) == 3 - # Verify cosine similarity was computed (doc2 vector is closest to query vector) + expected_order = [doc_id for doc_id, _ in sorted(expected_scores.items(), key=itemgetter(1), reverse=True)] + assert [doc.metadata["doc_id"] for doc in result] == expected_order + for doc in result: + doc_id = doc.metadata["doc_id"] + assert doc.metadata["score"] == pytest.approx(expected_scores[doc_id], rel=1e-6) def test_score_threshold_filtering_weighted( self, @@ -742,28 +957,40 @@ class TestWeightRerankRunner: - Keyword weight (0.4) is applied to keyword scores - Combined score is the sum of weighted components """ - # Arrange: Create runner with known weights runner = WeightRerankRunner(tenant_id="tenant123", weights=weights_config) - # Mock keyword extraction + keyword_map = { + "test": ["test"], + "Python is a programming language": ["python", "language"], + "JavaScript for web development": ["javascript", "web"], + "Java object-oriented programming": ["java", "programming"], + } mock_handler_instance = MagicMock() - mock_handler_instance.extract_keywords.return_value = ["test"] + mock_handler_instance.extract_keywords.side_effect = lambda text, _: keyword_map[text] mock_jieba_handler.return_value = mock_handler_instance - # Mock embedding mock_embedding_instance = MagicMock() mock_model_manager.return_value.get_model_instance.return_value = mock_embedding_instance mock_cache_instance = MagicMock() mock_cache_instance.embed_query.return_value = [0.1, 0.2, 0.3, 0.4] mock_cache_embedding.return_value = mock_cache_instance - # Act: Run reranking + query_scores = runner._calculate_keyword_score("test", sample_documents_with_vectors) + vector_scores = runner._calculate_cosine( + "tenant123", "test", sample_documents_with_vectors, weights_config.vector_setting + ) + expected_scores = { + doc.metadata["doc_id"]: (0.6 * vector_score + 0.4 * query_score) + for doc, query_score, vector_score in zip(sample_documents_with_vectors, query_scores, vector_scores) + } + result = runner.run(query="test", documents=sample_documents_with_vectors) - # Assert: Scores are combined with weights - # Score = 0.6 * vector_score + 0.4 * keyword_score - assert len(result) == 3 - assert all("score" in doc.metadata for doc in result) + expected_order = [doc_id for doc_id, _ in sorted(expected_scores.items(), key=itemgetter(1), reverse=True)] + assert [doc.metadata["doc_id"] for doc in result] == expected_order + for doc in result: + doc_id = doc.metadata["doc_id"] + assert doc.metadata["score"] == pytest.approx(expected_scores[doc_id], rel=1e-6) def test_existing_vector_score_in_metadata( self, @@ -778,7 +1005,6 @@ class TestWeightRerankRunner: - If document already has a score in metadata, it's used - Cosine similarity calculation is skipped for such documents """ - # Arrange: Documents with pre-existing scores documents = [ Document( page_content="Content with existing score", @@ -790,24 +1016,29 @@ class TestWeightRerankRunner: runner = WeightRerankRunner(tenant_id="tenant123", weights=weights_config) - # Mock keyword extraction + keyword_map = { + "test": ["test"], + "Content with existing score": ["test"], + } mock_handler_instance = MagicMock() - mock_handler_instance.extract_keywords.return_value = ["test"] + mock_handler_instance.extract_keywords.side_effect = lambda text, _: keyword_map[text] mock_jieba_handler.return_value = mock_handler_instance - # Mock embedding mock_embedding_instance = MagicMock() mock_model_manager.return_value.get_model_instance.return_value = mock_embedding_instance mock_cache_instance = MagicMock() mock_cache_instance.embed_query.return_value = [0.1, 0.2] mock_cache_embedding.return_value = mock_cache_instance - # Act: Run reranking + query_scores = runner._calculate_keyword_score("test", documents) + vector_scores = runner._calculate_cosine("tenant123", "test", documents, weights_config.vector_setting) + expected_score = 0.6 * vector_scores[0] + 0.4 * query_scores[0] + result = runner.run(query="test", documents=documents) - # Assert: Existing score is used in calculation assert len(result) == 1 - # The final score should incorporate the existing score (0.95) with vector weight (0.6) + assert result[0].metadata["doc_id"] == "doc1" + assert result[0].metadata["score"] == pytest.approx(expected_score, rel=1e-6) class TestRerankRunnerFactory: diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py index ca08cb0591..665e98bd9c 100644 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py +++ b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py @@ -1,80 +1,42 @@ -""" -Unit tests for dataset retrieval functionality. - -This module provides comprehensive test coverage for the RetrievalService class, -which is responsible for retrieving relevant documents from datasets using various -search strategies. - -Core Retrieval Mechanisms Tested: -================================== -1. **Vector Search (Semantic Search)** - - Uses embedding vectors to find semantically similar documents - - Supports score thresholds and top-k limiting - - Can filter by document IDs and metadata - -2. **Keyword Search** - - Traditional text-based search using keyword matching - - Handles special characters and query escaping - - Supports document filtering - -3. **Full-Text Search** - - BM25-based full-text search for text matching - - Used in hybrid search scenarios - -4. **Hybrid Search** - - Combines vector and full-text search results - - Implements deduplication to avoid duplicate chunks - - Uses DataPostProcessor for score merging with configurable weights - -5. **Score Merging Algorithms** - - Deduplication based on doc_id - - Retains higher-scoring duplicates - - Supports weighted score combination - -6. **Metadata Filtering** - - Filters documents based on metadata conditions - - Supports document ID filtering - -Test Architecture: -================== -- **Fixtures**: Provide reusable mock objects (datasets, documents, Flask app) -- **Mocking Strategy**: Mock at the method level (embedding_search, keyword_search, etc.) - rather than at the class level to properly simulate the ThreadPoolExecutor behavior -- **Pattern**: All tests follow Arrange-Act-Assert (AAA) pattern -- **Isolation**: Each test is independent and doesn't rely on external state - -Running Tests: -============== - # Run all tests in this module - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py -v - - # Run a specific test class - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py::TestRetrievalService -v - - # Run a specific test - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval.py::\ -TestRetrievalService::test_vector_search_basic -v - -Notes: -====== -- The RetrievalService uses ThreadPoolExecutor for concurrent search operations -- Tests mock the individual search methods to avoid threading complexity -- All mocked search methods modify the all_documents list in-place -- Score thresholds and top-k limits are enforced by the search methods -""" - +import threading +from contextlib import contextmanager, nullcontext +from types import SimpleNamespace from unittest.mock import MagicMock, Mock, patch from uuid import uuid4 import pytest +from flask import Flask, current_app +from sqlalchemy import column +from core.app.app_config.entities import ( + Condition as AppCondition, +) +from core.app.app_config.entities import ( + DatasetEntity, + DatasetRetrieveConfigEntity, +) +from core.app.app_config.entities import ( + MetadataFilteringCondition as AppMetadataFilteringCondition, +) +from core.app.app_config.entities import ( + ModelConfig as AppModelConfig, +) +from core.app.app_config.entities import ModelConfig as WorkflowModelConfig +from core.app.entities.app_invoke_entities import InvokeFrom, ModelConfigWithCredentialsEntity +from core.entities.agent_entities import PlanningStrategy +from core.entities.model_entities import ModelStatus +from core.rag.data_post_processor.data_post_processor import WeightsDict from core.rag.datasource.retrieval_service import RetrievalService +from core.rag.index_processor.constant.doc_type import DocType +from core.rag.index_processor.constant.index_type import IndexStructureType from core.rag.models.document import Document +from core.rag.rerank.rerank_type import RerankMode from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from core.rag.retrieval.retrieval_methods import RetrievalMethod +from core.workflow.nodes.knowledge_retrieval import exc +from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest +from dify_graph.model_runtime.entities.llm_entities import LLMUsage +from dify_graph.model_runtime.entities.model_entities import ModelFeature from models.dataset import Dataset # ==================== Helper Functions ==================== @@ -2013,3 +1975,3094 @@ class TestDocumentModel: assert doc1 == doc2 assert doc1 != doc3 + + +# ==================== Helper Functions ==================== + + +def create_mock_dataset_methods( + dataset_id: str | None = None, + tenant_id: str | None = None, + provider: str = "dify", + indexing_technique: str = "high_quality", + available_document_count: int = 10, +) -> Mock: + """ + Create a mock Dataset object for testing. + + Args: + dataset_id: Unique identifier for the dataset + tenant_id: Tenant ID for the dataset + provider: Provider type ("dify" or "external") + indexing_technique: Indexing technique ("high_quality" or "economy") + available_document_count: Number of available documents + + Returns: + Mock: A properly configured Dataset mock + """ + dataset = Mock(spec=Dataset) + dataset.id = dataset_id or str(uuid4()) + dataset.tenant_id = tenant_id or str(uuid4()) + dataset.name = "test_dataset" + dataset.provider = provider + dataset.indexing_technique = indexing_technique + dataset.available_document_count = available_document_count + dataset.embedding_model = "text-embedding-ada-002" + dataset.embedding_model_provider = "openai" + dataset.retrieval_model = { + "search_method": "semantic_search", + "reranking_enable": False, + "top_k": 4, + "score_threshold_enabled": False, + } + return dataset + + +def create_mock_document_methods( + content: str, + doc_id: str, + score: float = 0.8, + provider: str = "dify", + additional_metadata: dict | None = None, +) -> Document: + """ + Create a mock Document object for testing. + + Args: + content: The text content of the document + doc_id: Unique identifier for the document chunk + score: Relevance score (0.0 to 1.0) + provider: Document provider ("dify" or "external") + additional_metadata: Optional extra metadata fields + + Returns: + Document: A properly structured Document object + """ + metadata = { + "doc_id": doc_id, + "document_id": str(uuid4()), + "dataset_id": str(uuid4()), + "score": score, + } + + if additional_metadata: + metadata.update(additional_metadata) + + return Document( + page_content=content, + metadata=metadata, + provider=provider, + ) + + +# ==================== Test _check_knowledge_rate_limit ==================== + + +class TestCheckKnowledgeRateLimit: + """ + Test suite for _check_knowledge_rate_limit method. + + The _check_knowledge_rate_limit method validates whether a tenant has + exceeded their knowledge retrieval rate limit. This is important for: + - Preventing abuse of the knowledge retrieval system + - Enforcing subscription plan limits + - Tracking usage for billing purposes + + Test Cases: + ============ + 1. Rate limit disabled - no exception raised + 2. Rate limit enabled but not exceeded - no exception raised + 3. Rate limit enabled and exceeded - RateLimitExceededError raised + 4. Redis operations are performed correctly + 5. RateLimitLog is created when limit is exceeded + """ + + @patch("core.rag.retrieval.dataset_retrieval.FeatureService") + @patch("core.rag.retrieval.dataset_retrieval.redis_client") + def test_rate_limit_disabled_no_exception(self, mock_redis, mock_feature_service): + """ + Test that when rate limit is disabled, no exception is raised. + + This test verifies the behavior when the tenant's subscription + does not have rate limiting enabled. + + Verifies: + - FeatureService.get_knowledge_rate_limit is called + - No Redis operations are performed + - No exception is raised + - Retrieval proceeds normally + """ + # Arrange + tenant_id = str(uuid4()) + dataset_retrieval = DatasetRetrieval() + + # Mock rate limit disabled + mock_limit = Mock() + mock_limit.enabled = False + mock_feature_service.get_knowledge_rate_limit.return_value = mock_limit + + # Act & Assert - should not raise any exception + dataset_retrieval._check_knowledge_rate_limit(tenant_id) + + # Verify FeatureService was called + mock_feature_service.get_knowledge_rate_limit.assert_called_once_with(tenant_id) + + # Verify no Redis operations were performed + assert not mock_redis.zadd.called + assert not mock_redis.zremrangebyscore.called + assert not mock_redis.zcard.called + + @patch("core.rag.retrieval.dataset_retrieval.session_factory") + @patch("core.rag.retrieval.dataset_retrieval.FeatureService") + @patch("core.rag.retrieval.dataset_retrieval.redis_client") + @patch("core.rag.retrieval.dataset_retrieval.time") + def test_rate_limit_enabled_not_exceeded(self, mock_time, mock_redis, mock_feature_service, mock_session_factory): + """ + Test that when rate limit is enabled but not exceeded, no exception is raised. + + This test simulates a tenant making requests within their rate limit. + The Redis sorted set stores timestamps of recent requests, and old + requests (older than 60 seconds) are removed. + + Verifies: + - Redis zadd is called to track the request + - Redis zremrangebyscore removes old entries + - Redis zcard returns count within limit + - No exception is raised + """ + # Arrange + tenant_id = str(uuid4()) + dataset_retrieval = DatasetRetrieval() + + # Mock rate limit enabled with limit of 100 requests per minute + mock_limit = Mock() + mock_limit.enabled = True + mock_limit.limit = 100 + mock_limit.subscription_plan = "professional" + mock_feature_service.get_knowledge_rate_limit.return_value = mock_limit + + # Mock time + current_time = 1234567890000 # Current time in milliseconds + mock_time.time.return_value = current_time / 1000 # Return seconds + mock_time.time.__mul__ = lambda self, x: int(self * x) # Multiply to get milliseconds + + # Mock Redis operations + # zcard returns 50 (within limit of 100) + mock_redis.zcard.return_value = 50 + + # Mock session_factory.create_session + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session_factory.create_session.return_value.__exit__.return_value = None + + # Act & Assert - should not raise any exception + dataset_retrieval._check_knowledge_rate_limit(tenant_id) + + # Verify Redis operations + expected_key = f"rate_limit_{tenant_id}" + mock_redis.zadd.assert_called_once_with(expected_key, {current_time: current_time}) + mock_redis.zremrangebyscore.assert_called_once_with(expected_key, 0, current_time - 60000) + mock_redis.zcard.assert_called_once_with(expected_key) + + @patch("core.rag.retrieval.dataset_retrieval.session_factory") + @patch("core.rag.retrieval.dataset_retrieval.FeatureService") + @patch("core.rag.retrieval.dataset_retrieval.redis_client") + @patch("core.rag.retrieval.dataset_retrieval.time") + def test_rate_limit_enabled_exceeded_raises_exception( + self, mock_time, mock_redis, mock_feature_service, mock_session_factory + ): + """ + Test that when rate limit is enabled and exceeded, RateLimitExceededError is raised. + + This test simulates a tenant exceeding their rate limit. When the count + of recent requests exceeds the limit, an exception should be raised and + a RateLimitLog should be created. + + Verifies: + - Redis zcard returns count exceeding limit + - RateLimitExceededError is raised with correct message + - RateLimitLog is created in database + - Session operations are performed correctly + """ + # Arrange + tenant_id = str(uuid4()) + dataset_retrieval = DatasetRetrieval() + + # Mock rate limit enabled with limit of 100 requests per minute + mock_limit = Mock() + mock_limit.enabled = True + mock_limit.limit = 100 + mock_limit.subscription_plan = "professional" + mock_feature_service.get_knowledge_rate_limit.return_value = mock_limit + + # Mock time + current_time = 1234567890000 + mock_time.time.return_value = current_time / 1000 + + # Mock Redis operations - return count exceeding limit + mock_redis.zcard.return_value = 150 # Exceeds limit of 100 + + # Mock session_factory.create_session + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session_factory.create_session.return_value.__exit__.return_value = None + + # Act & Assert + with pytest.raises(exc.RateLimitExceededError) as exc_info: + dataset_retrieval._check_knowledge_rate_limit(tenant_id) + + # Verify exception message + assert "knowledge base request rate limit" in str(exc_info.value) + + # Verify RateLimitLog was created + mock_session.add.assert_called_once() + added_log = mock_session.add.call_args[0][0] + assert added_log.tenant_id == tenant_id + assert added_log.subscription_plan == "professional" + assert added_log.operation == "knowledge" + + +# ==================== Test _get_available_datasets ==================== + + +class TestGetAvailableDatasets: + """ + Test suite for _get_available_datasets method. + + The _get_available_datasets method retrieves datasets that are available + for retrieval. A dataset is considered available if: + - It belongs to the specified tenant + - It's in the list of requested dataset_ids + - It has at least one completed, enabled, non-archived document OR + - It's an external provider dataset + + Note: Due to SQLAlchemy subquery complexity, full testing is done in + integration tests. Unit tests here verify basic behavior. + """ + + def test_method_exists_and_has_correct_signature(self): + """ + Test that the method exists and has the correct signature. + + Verifies: + - Method exists on DatasetRetrieval class + - Accepts tenant_id and dataset_ids parameters + """ + # Arrange + dataset_retrieval = DatasetRetrieval() + + # Assert - method exists + assert hasattr(dataset_retrieval, "_get_available_datasets") + # Assert - method is callable + assert callable(dataset_retrieval._get_available_datasets) + + +# ==================== Test knowledge_retrieval ==================== + + +class TestDatasetRetrievalKnowledgeRetrieval: + """ + Test suite for knowledge_retrieval method. + + The knowledge_retrieval method is the main entry point for retrieving + knowledge from datasets. It orchestrates the entire retrieval process: + 1. Checks rate limits + 2. Gets available datasets + 3. Applies metadata filtering if enabled + 4. Performs retrieval (single or multiple mode) + 5. Formats and returns results + + Test Cases: + ============ + 1. Single mode retrieval + 2. Multiple mode retrieval + 3. Metadata filtering disabled + 4. Metadata filtering automatic + 5. Metadata filtering manual + 6. External documents handling + 7. Dify documents handling + 8. Empty results handling + 9. Rate limit exceeded + 10. No available datasets + """ + + def test_knowledge_retrieval_single_mode_basic(self): + """ + Test knowledge_retrieval in single retrieval mode - basic check. + + Note: Full single mode testing requires complex model mocking and + is better suited for integration tests. This test verifies the + method accepts single mode requests. + + Verifies: + - Method can accept single mode request + - Request parameters are correctly structured + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="single", + model_provider="openai", + model_name="gpt-4", + model_mode="chat", + completion_params={"temperature": 0.7}, + ) + + # Assert - request is properly structured + assert request.retrieval_mode == "single" + assert request.model_provider == "openai" + assert request.model_name == "gpt-4" + assert request.model_mode == "chat" + + @patch("core.rag.retrieval.dataset_retrieval.DataPostProcessor") + @patch("core.rag.retrieval.dataset_retrieval.session_factory") + def test_knowledge_retrieval_multiple_mode(self, mock_session_factory, mock_data_processor): + """ + Test knowledge_retrieval in multiple retrieval mode. + + In multiple mode, retrieval is performed across all datasets and + results are combined and reranked. + + Verifies: + - Rate limit is checked + - Available datasets are retrieved + - Multiple retrieval is performed + - Results are combined and reranked + - Results are formatted correctly + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id1 = str(uuid4()) + dataset_id2 = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id1, dataset_id2], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + score_threshold=0.7, + reranking_enable=True, + reranking_mode="reranking_model", + reranking_model={"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v2"}, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock _check_knowledge_rate_limit + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + # Mock _get_available_datasets + mock_dataset1 = create_mock_dataset_methods(dataset_id=dataset_id1, tenant_id=tenant_id) + mock_dataset2 = create_mock_dataset_methods(dataset_id=dataset_id2, tenant_id=tenant_id) + with patch.object( + dataset_retrieval, "_get_available_datasets", return_value=[mock_dataset1, mock_dataset2] + ): + # Mock get_metadata_filter_condition + with patch.object(dataset_retrieval, "get_metadata_filter_condition", return_value=(None, None)): + # Mock multiple_retrieve to return documents + doc1 = create_mock_document_methods("Python is great", "doc1", score=0.9) + doc2 = create_mock_document_methods("Python is awesome", "doc2", score=0.8) + with patch.object( + dataset_retrieval, "multiple_retrieve", return_value=[doc1, doc2] + ) as mock_multiple_retrieve: + # Mock format_retrieval_documents + mock_record = Mock() + mock_record.segment = Mock() + mock_record.segment.dataset_id = dataset_id1 + mock_record.segment.document_id = str(uuid4()) + mock_record.segment.index_node_hash = "hash123" + mock_record.segment.hit_count = 5 + mock_record.segment.word_count = 100 + mock_record.segment.position = 1 + mock_record.segment.get_sign_content.return_value = "Python is great" + mock_record.segment.answer = None + mock_record.score = 0.9 + mock_record.child_chunks = [] + mock_record.summary = None + mock_record.files = None + + mock_retrieval_service = Mock() + mock_retrieval_service.format_retrieval_documents.return_value = [mock_record] + + with patch( + "core.rag.retrieval.dataset_retrieval.RetrievalService", + return_value=mock_retrieval_service, + ): + # Mock database queries + mock_session = MagicMock() + mock_session_factory.create_session.return_value.__enter__.return_value = mock_session + mock_session_factory.create_session.return_value.__exit__.return_value = None + + mock_dataset_from_db = Mock() + mock_dataset_from_db.id = dataset_id1 + mock_dataset_from_db.name = "test_dataset" + + mock_document = Mock() + mock_document.id = str(uuid4()) + mock_document.name = "test_doc" + mock_document.data_source_type = "upload_file" + mock_document.doc_metadata = {} + + mock_session.query.return_value.filter.return_value.all.return_value = [ + mock_dataset_from_db + ] + mock_session.query.return_value.filter.return_value.all.__iter__ = lambda self: iter( + [mock_dataset_from_db, mock_document] + ) + + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert isinstance(result, list) + mock_multiple_retrieve.assert_called_once() + + def test_knowledge_retrieval_metadata_filtering_disabled(self): + """ + Test knowledge_retrieval with metadata filtering disabled. + + When metadata filtering is disabled, get_metadata_filter_condition is + NOT called (the method checks metadata_filtering_mode != "disabled"). + + Verifies: + - get_metadata_filter_condition is NOT called when mode is "disabled" + - Retrieval proceeds without metadata filters + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + metadata_filtering_mode="disabled", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock dependencies + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + mock_dataset = create_mock_dataset_methods(dataset_id=dataset_id, tenant_id=tenant_id) + with patch.object(dataset_retrieval, "_get_available_datasets", return_value=[mock_dataset]): + # Mock get_metadata_filter_condition - should NOT be called when disabled + with patch.object( + dataset_retrieval, + "get_metadata_filter_condition", + return_value=(None, None), + ) as mock_get_metadata: + with patch.object(dataset_retrieval, "multiple_retrieve", return_value=[]): + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert isinstance(result, list) + # get_metadata_filter_condition should NOT be called when mode is "disabled" + mock_get_metadata.assert_not_called() + + def test_knowledge_retrieval_with_external_documents(self): + """ + Test knowledge_retrieval with external documents. + + External documents come from external knowledge bases and should + be formatted differently than Dify documents. + + Verifies: + - External documents are handled correctly + - Provider is set to "external" + - Metadata includes external-specific fields + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock dependencies + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + mock_dataset = create_mock_dataset_methods(dataset_id=dataset_id, tenant_id=tenant_id, provider="external") + with patch.object(dataset_retrieval, "_get_available_datasets", return_value=[mock_dataset]): + with patch.object(dataset_retrieval, "get_metadata_filter_condition", return_value=(None, None)): + # Create external document + external_doc = create_mock_document_methods( + "External knowledge", + "doc1", + score=0.9, + provider="external", + additional_metadata={ + "dataset_id": dataset_id, + "dataset_name": "external_kb", + "document_id": "ext_doc1", + "title": "External Document", + }, + ) + with patch.object(dataset_retrieval, "multiple_retrieve", return_value=[external_doc]): + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert isinstance(result, list) + if result: + assert result[0].metadata.data_source_type == "external" + + def test_knowledge_retrieval_empty_results(self): + """ + Test knowledge_retrieval when no documents are found. + + Verifies: + - Empty list is returned + - No errors are raised + - All dependencies are still called + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock dependencies + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + mock_dataset = create_mock_dataset_methods(dataset_id=dataset_id, tenant_id=tenant_id) + with patch.object(dataset_retrieval, "_get_available_datasets", return_value=[mock_dataset]): + with patch.object(dataset_retrieval, "get_metadata_filter_condition", return_value=(None, None)): + # Mock multiple_retrieve to return empty list + with patch.object(dataset_retrieval, "multiple_retrieve", return_value=[]): + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert result == [] + + def test_knowledge_retrieval_rate_limit_exceeded(self): + """ + Test knowledge_retrieval when rate limit is exceeded. + + Verifies: + - RateLimitExceededError is raised + - No further processing occurs + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock _check_knowledge_rate_limit to raise exception + with patch.object( + dataset_retrieval, + "_check_knowledge_rate_limit", + side_effect=exc.RateLimitExceededError("Rate limit exceeded"), + ): + # Act & Assert + with pytest.raises(exc.RateLimitExceededError): + dataset_retrieval.knowledge_retrieval(request) + + def test_knowledge_retrieval_no_available_datasets(self): + """ + Test knowledge_retrieval when no datasets are available. + + Verifies: + - Empty list is returned + - No retrieval is attempted + """ + # Arrange + tenant_id = str(uuid4()) + user_id = str(uuid4()) + app_id = str(uuid4()) + dataset_id = str(uuid4()) + + request = KnowledgeRetrievalRequest( + tenant_id=tenant_id, + user_id=user_id, + app_id=app_id, + user_from="web", + dataset_ids=[dataset_id], + query="What is Python?", + retrieval_mode="multiple", + top_k=5, + ) + + dataset_retrieval = DatasetRetrieval() + + # Mock dependencies + with patch.object(dataset_retrieval, "_check_knowledge_rate_limit"): + # Mock _get_available_datasets to return empty list + with patch.object(dataset_retrieval, "_get_available_datasets", return_value=[]): + # Act + result = dataset_retrieval.knowledge_retrieval(request) + + # Assert + assert result == [] + + def test_knowledge_retrieval_handles_multiple_documents_with_different_scores(self): + """ + Test that knowledge_retrieval processes multiple documents with different scores. + + Note: Full sorting and position testing requires complex SQLAlchemy mocking + which is better suited for integration tests. This test verifies documents + with different scores can be created and have their metadata. + + Verifies: + - Documents can be created with different scores + - Score metadata is properly set + """ + # Create documents with different scores + doc1 = create_mock_document_methods("Low score", "doc1", score=0.6) + doc2 = create_mock_document_methods("High score", "doc2", score=0.95) + doc3 = create_mock_document_methods("Medium score", "doc3", score=0.8) + + # Assert - each document has the correct score + assert doc1.metadata["score"] == 0.6 + assert doc2.metadata["score"] == 0.95 + assert doc3.metadata["score"] == 0.8 + + # Assert - documents are correctly sorted (not the retrieval result, just the list) + unsorted = [doc1, doc2, doc3] + sorted_docs = sorted(unsorted, key=lambda d: d.metadata["score"], reverse=True) + assert [d.metadata["score"] for d in sorted_docs] == [0.95, 0.8, 0.6] + + +class TestProcessMetadataFilterFunc: + """ + Comprehensive test suite for process_metadata_filter_func method. + + This test class validates all metadata filtering conditions supported by + the DatasetRetrieval class, including string operations, numeric comparisons, + null checks, and list operations. + + Method Signature: + ================== + def process_metadata_filter_func( + self, sequence: int, condition: str, metadata_name: str, value: Any | None, filters: list + ) -> list: + + The method builds SQLAlchemy filter expressions by: + 1. Validating value is not None (except for empty/not empty conditions) + 2. Using DatasetDocument.doc_metadata JSON field operations + 3. Adding appropriate SQLAlchemy expressions to the filters list + 4. Returning the updated filters list + + Mocking Strategy: + ================== + - Mock DatasetDocument.doc_metadata to avoid database dependencies + - Verify filter expressions are created correctly + - Test with various data types (str, int, float, list) + """ + + @pytest.fixture + def retrieval(self): + """ + Create a DatasetRetrieval instance for testing. + + Returns: + DatasetRetrieval: Instance to test process_metadata_filter_func + """ + return DatasetRetrieval() + + @pytest.fixture + def mock_doc_metadata(self): + """ + Mock the DatasetDocument.doc_metadata JSON field. + + The method uses DatasetDocument.doc_metadata[metadata_name] to access + JSON fields. We mock this to avoid database dependencies. + + Returns: + Mock: Mocked doc_metadata attribute + """ + mock_metadata_field = MagicMock() + + # Create mock for string access + mock_string_access = MagicMock() + mock_string_access.like = MagicMock() + mock_string_access.notlike = MagicMock() + mock_string_access.__eq__ = MagicMock(return_value=MagicMock()) + mock_string_access.__ne__ = MagicMock(return_value=MagicMock()) + mock_string_access.in_ = MagicMock(return_value=MagicMock()) + + # Create mock for float access (for numeric comparisons) + mock_float_access = MagicMock() + mock_float_access.__eq__ = MagicMock(return_value=MagicMock()) + mock_float_access.__ne__ = MagicMock(return_value=MagicMock()) + mock_float_access.__lt__ = MagicMock(return_value=MagicMock()) + mock_float_access.__gt__ = MagicMock(return_value=MagicMock()) + mock_float_access.__le__ = MagicMock(return_value=MagicMock()) + mock_float_access.__ge__ = MagicMock(return_value=MagicMock()) + + # Create mock for null checks + mock_null_access = MagicMock() + mock_null_access.is_ = MagicMock(return_value=MagicMock()) + mock_null_access.isnot = MagicMock(return_value=MagicMock()) + + # Setup __getitem__ to return appropriate mock based on usage + def getitem_side_effect(name): + if name in ["author", "title", "category"]: + return mock_string_access + elif name in ["year", "price", "rating"]: + return mock_float_access + else: + return mock_string_access + + mock_metadata_field.__getitem__ = MagicMock(side_effect=getitem_side_effect) + mock_metadata_field.as_string.return_value = mock_string_access + mock_metadata_field.as_float.return_value = mock_float_access + mock_metadata_field[metadata_name:str].is_ = mock_null_access.is_ + mock_metadata_field[metadata_name:str].isnot = mock_null_access.isnot + + return mock_metadata_field + + # ==================== String Condition Tests ==================== + + def test_contains_condition_string_value(self, retrieval): + """ + Test 'contains' condition with string value. + + Verifies: + - Filters list is populated with LIKE expression + - Pattern matching uses %value% syntax + """ + filters = [] + sequence = 0 + condition = "contains" + metadata_name = "author" + value = "John" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_not_contains_condition(self, retrieval): + """ + Test 'not contains' condition. + + Verifies: + - Filters list is populated with NOT LIKE expression + - Pattern matching uses %value% syntax with negation + """ + filters = [] + sequence = 0 + condition = "not contains" + metadata_name = "title" + value = "banned" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_start_with_condition(self, retrieval): + """ + Test 'start with' condition. + + Verifies: + - Filters list is populated with LIKE expression + - Pattern matching uses value% syntax + """ + filters = [] + sequence = 0 + condition = "start with" + metadata_name = "category" + value = "tech" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_end_with_condition(self, retrieval): + """ + Test 'end with' condition. + + Verifies: + - Filters list is populated with LIKE expression + - Pattern matching uses %value syntax + """ + filters = [] + sequence = 0 + condition = "end with" + metadata_name = "filename" + value = ".pdf" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== Equality Condition Tests ==================== + + def test_is_condition_with_string_value(self, retrieval): + """ + Test 'is' (=) condition with string value. + + Verifies: + - Filters list is populated with equality expression + - String comparison is used + """ + filters = [] + sequence = 0 + condition = "is" + metadata_name = "author" + value = "Jane Doe" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_equals_condition_with_string_value(self, retrieval): + """ + Test '=' condition with string value. + + Verifies: + - Same behavior as 'is' condition + - String comparison is used + """ + filters = [] + sequence = 0 + condition = "=" + metadata_name = "category" + value = "technology" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_is_condition_with_int_value(self, retrieval): + """ + Test 'is' condition with integer value. + + Verifies: + - Numeric comparison is used + - as_float() is called on the metadata field + """ + filters = [] + sequence = 0 + condition = "is" + metadata_name = "year" + value = 2023 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_is_condition_with_float_value(self, retrieval): + """ + Test 'is' condition with float value. + + Verifies: + - Numeric comparison is used + - as_float() is called on the metadata field + """ + filters = [] + sequence = 0 + condition = "is" + metadata_name = "price" + value = 19.99 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_is_not_condition_with_string_value(self, retrieval): + """ + Test 'is not' (≠) condition with string value. + + Verifies: + - Filters list is populated with inequality expression + - String comparison is used + """ + filters = [] + sequence = 0 + condition = "is not" + metadata_name = "author" + value = "Unknown" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_not_equals_condition(self, retrieval): + """ + Test '≠' condition with string value. + + Verifies: + - Same behavior as 'is not' condition + - Inequality expression is used + """ + filters = [] + sequence = 0 + condition = "≠" + metadata_name = "category" + value = "archived" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_is_not_condition_with_numeric_value(self, retrieval): + """ + Test 'is not' condition with numeric value. + + Verifies: + - Numeric inequality comparison is used + - as_float() is called on the metadata field + """ + filters = [] + sequence = 0 + condition = "is not" + metadata_name = "year" + value = 2000 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== Null Condition Tests ==================== + + def test_empty_condition(self, retrieval): + """ + Test 'empty' condition (null check). + + Verifies: + - Filters list is populated with IS NULL expression + - Value can be None for this condition + """ + filters = [] + sequence = 0 + condition = "empty" + metadata_name = "author" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_not_empty_condition(self, retrieval): + """ + Test 'not empty' condition (not null check). + + Verifies: + - Filters list is populated with IS NOT NULL expression + - Value can be None for this condition + """ + filters = [] + sequence = 0 + condition = "not empty" + metadata_name = "description" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== Numeric Comparison Tests ==================== + + def test_before_condition(self, retrieval): + """ + Test 'before' (<) condition. + + Verifies: + - Filters list is populated with less than expression + - Numeric comparison is used + """ + filters = [] + sequence = 0 + condition = "before" + metadata_name = "year" + value = 2020 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_less_than_condition(self, retrieval): + """ + Test '<' condition. + + Verifies: + - Same behavior as 'before' condition + - Less than expression is used + """ + filters = [] + sequence = 0 + condition = "<" + metadata_name = "price" + value = 100.0 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_after_condition(self, retrieval): + """ + Test 'after' (>) condition. + + Verifies: + - Filters list is populated with greater than expression + - Numeric comparison is used + """ + filters = [] + sequence = 0 + condition = "after" + metadata_name = "year" + value = 2020 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_greater_than_condition(self, retrieval): + """ + Test '>' condition. + + Verifies: + - Same behavior as 'after' condition + - Greater than expression is used + """ + filters = [] + sequence = 0 + condition = ">" + metadata_name = "rating" + value = 4.5 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_less_than_or_equal_condition_unicode(self, retrieval): + """ + Test '≤' condition. + + Verifies: + - Filters list is populated with less than or equal expression + - Numeric comparison is used + """ + filters = [] + sequence = 0 + condition = "≤" + metadata_name = "price" + value = 50.0 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_less_than_or_equal_condition_ascii(self, retrieval): + """ + Test '<=' condition. + + Verifies: + - Same behavior as '≤' condition + - Less than or equal expression is used + """ + filters = [] + sequence = 0 + condition = "<=" + metadata_name = "year" + value = 2023 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_greater_than_or_equal_condition_unicode(self, retrieval): + """ + Test '≥' condition. + + Verifies: + - Filters list is populated with greater than or equal expression + - Numeric comparison is used + """ + filters = [] + sequence = 0 + condition = "≥" + metadata_name = "rating" + value = 3.5 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_greater_than_or_equal_condition_ascii(self, retrieval): + """ + Test '>=' condition. + + Verifies: + - Same behavior as '≥' condition + - Greater than or equal expression is used + """ + filters = [] + sequence = 0 + condition = ">=" + metadata_name = "year" + value = 2000 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== List/In Condition Tests ==================== + + def test_in_condition_with_comma_separated_string(self, retrieval): + """ + Test 'in' condition with comma-separated string value. + + Verifies: + - String is split into list + - Whitespace is trimmed from each value + - IN expression is created + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = "tech, science, AI " + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_in_condition_with_list_value(self, retrieval): + """ + Test 'in' condition with list value. + + Verifies: + - List is processed correctly + - None values are filtered out + - IN expression is created with valid values + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "tags" + value = ["python", "javascript", None, "golang"] + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_in_condition_with_tuple_value(self, retrieval): + """ + Test 'in' condition with tuple value. + + Verifies: + - Tuple is processed like a list + - IN expression is created + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = ("tech", "science", "ai") + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_in_condition_with_empty_string(self, retrieval): + """ + Test 'in' condition with empty string value. + + Verifies: + - Empty string results in literal(False) filter + - No valid values to match + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = "" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + # Verify it's a literal(False) expression + # This is a bit tricky to test without access to the actual expression + + def test_in_condition_with_only_whitespace(self, retrieval): + """ + Test 'in' condition with whitespace-only string value. + + Verifies: + - Whitespace-only string results in literal(False) filter + - All values are stripped and filtered out + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = " , , " + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_in_condition_with_single_string(self, retrieval): + """ + Test 'in' condition with single non-comma string. + + Verifies: + - Single string is treated as single-item list + - IN expression is created with one value + """ + filters = [] + sequence = 0 + condition = "in" + metadata_name = "category" + value = "technology" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + # ==================== Edge Case Tests ==================== + + def test_none_value_with_non_empty_condition(self, retrieval): + """ + Test None value with conditions that require value. + + Verifies: + - Original filters list is returned unchanged + - No filter is added for None values (except empty/not empty) + """ + filters = [] + sequence = 0 + condition = "contains" + metadata_name = "author" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 0 # No filter added + + def test_none_value_with_equals_condition(self, retrieval): + """ + Test None value with 'is' (=) condition. + + Verifies: + - Original filters list is returned unchanged + - No filter is added for None values + """ + filters = [] + sequence = 0 + condition = "is" + metadata_name = "author" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 0 + + def test_none_value_with_numeric_condition(self, retrieval): + """ + Test None value with numeric comparison condition. + + Verifies: + - Original filters list is returned unchanged + - No filter is added for None values + """ + filters = [] + sequence = 0 + condition = ">" + metadata_name = "year" + value = None + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 0 + + def test_existing_filters_preserved(self, retrieval): + """ + Test that existing filters are preserved. + + Verifies: + - Existing filters in the list are not removed + - New filters are appended to the list + """ + existing_filter = MagicMock() + filters = [existing_filter] + sequence = 0 + condition = "contains" + metadata_name = "author" + value = "test" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 2 + assert filters[0] == existing_filter + + def test_multiple_filters_accumulated(self, retrieval): + """ + Test multiple calls to accumulate filters. + + Verifies: + - Each call adds a new filter to the list + - All filters are preserved across calls + """ + filters = [] + + # First filter + retrieval.process_metadata_filter_func(0, "contains", "author", "John", filters) + assert len(filters) == 1 + + # Second filter + retrieval.process_metadata_filter_func(1, ">", "year", 2020, filters) + assert len(filters) == 2 + + # Third filter + retrieval.process_metadata_filter_func(2, "is", "category", "tech", filters) + assert len(filters) == 3 + + def test_unknown_condition(self, retrieval): + """ + Test unknown/unsupported condition. + + Verifies: + - Original filters list is returned unchanged + - No filter is added for unknown conditions + """ + filters = [] + sequence = 0 + condition = "unknown_condition" + metadata_name = "author" + value = "test" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 0 + + def test_empty_string_value_with_contains(self, retrieval): + """ + Test empty string value with 'contains' condition. + + Verifies: + - Filter is added even with empty string + - LIKE expression is created + """ + filters = [] + sequence = 0 + condition = "contains" + metadata_name = "author" + value = "" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_special_characters_in_value(self, retrieval): + """ + Test special characters in value string. + + Verifies: + - Special characters are handled in value + - LIKE expression is created correctly + """ + filters = [] + sequence = 0 + condition = "contains" + metadata_name = "title" + value = "C++ & Python's features" + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_zero_value_with_numeric_condition(self, retrieval): + """ + Test zero value with numeric comparison condition. + + Verifies: + - Zero is treated as valid value + - Numeric comparison is performed + """ + filters = [] + sequence = 0 + condition = ">" + metadata_name = "price" + value = 0 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_negative_value_with_numeric_condition(self, retrieval): + """ + Test negative value with numeric comparison condition. + + Verifies: + - Negative numbers are handled correctly + - Numeric comparison is performed + """ + filters = [] + sequence = 0 + condition = "<" + metadata_name = "temperature" + value = -10.5 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + def test_float_value_with_integer_comparison(self, retrieval): + """ + Test float value with numeric comparison condition. + + Verifies: + - Float values work correctly + - Numeric comparison is performed + """ + filters = [] + sequence = 0 + condition = ">=" + metadata_name = "rating" + value = 4.5 + + result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) + + assert result == filters + assert len(filters) == 1 + + +class TestKnowledgeRetrievalRegression: + @pytest.fixture + def mock_dataset(self) -> Dataset: + dataset = Mock(spec=Dataset) + dataset.id = str(uuid4()) + dataset.tenant_id = str(uuid4()) + dataset.name = "test_dataset" + dataset.indexing_technique = "high_quality" + dataset.provider = "dify" + return dataset + + def test_multiple_retrieve_reranking_with_app_context(self, mock_dataset): + """ + Repro test for current bug: + reranking runs after `with flask_app.app_context():` exits. + `_multiple_retrieve_thread` catches exceptions and stores them into `thread_exceptions`, + so we must assert from that list (not from an outer try/except). + """ + dataset_retrieval = DatasetRetrieval() + flask_app = Flask(__name__) + tenant_id = str(uuid4()) + + # second dataset to ensure dataset_count > 1 reranking branch + secondary_dataset = Mock(spec=Dataset) + secondary_dataset.id = str(uuid4()) + secondary_dataset.provider = "dify" + secondary_dataset.indexing_technique = "high_quality" + + # retriever returns 1 doc into internal list (all_documents_item) + document = Document( + page_content="Context aware doc", + metadata={ + "doc_id": "doc1", + "score": 0.95, + "document_id": str(uuid4()), + "dataset_id": mock_dataset.id, + }, + provider="dify", + ) + + def fake_retriever( + flask_app, dataset_id, query, top_k, all_documents, document_ids_filter, metadata_condition, attachment_ids + ): + all_documents.append(document) + + called = {"init": 0, "invoke": 0} + + class ContextRequiredPostProcessor: + def __init__(self, *args, **kwargs): + called["init"] += 1 + # will raise RuntimeError if no Flask app context exists + _ = current_app.name + + def invoke(self, *args, **kwargs): + called["invoke"] += 1 + _ = current_app.name + return kwargs.get("documents") or args[1] + + # output list from _multiple_retrieve_thread + all_documents: list[Document] = [] + + # IMPORTANT: _multiple_retrieve_thread swallows exceptions and appends them here + thread_exceptions: list[Exception] = [] + + def target(): + with patch.object(dataset_retrieval, "_retriever", side_effect=fake_retriever): + with patch( + "core.rag.retrieval.dataset_retrieval.DataPostProcessor", + ContextRequiredPostProcessor, + ): + dataset_retrieval._multiple_retrieve_thread( + flask_app=flask_app, + available_datasets=[mock_dataset, secondary_dataset], + metadata_condition=None, + metadata_filter_document_ids=None, + all_documents=all_documents, + tenant_id=tenant_id, + reranking_enable=True, + reranking_mode="reranking_model", + reranking_model={ + "reranking_provider_name": "cohere", + "reranking_model_name": "rerank-v2", + }, + weights=None, + top_k=3, + score_threshold=0.0, + query="test query", + attachment_id=None, + dataset_count=2, # force reranking branch + thread_exceptions=thread_exceptions, # ✅ key + ) + + t = threading.Thread(target=target) + t.start() + t.join() + + # Ensure reranking branch was actually executed + assert called["init"] >= 1, "DataPostProcessor was never constructed; reranking branch may not have run." + + # Current buggy code should record an exception (not raise it) + assert not thread_exceptions, thread_exceptions + + +class _FakeFlaskApp: + def app_context(self): + return nullcontext() + + +class _ImmediateThread: + def __init__(self, target=None, kwargs=None): + self._target = target + self._kwargs = kwargs or {} + self._alive = False + + def start(self) -> None: + self._alive = True + if self._target: + self._target(**self._kwargs) + self._alive = False + + def join(self, timeout=None) -> None: + return None + + def is_alive(self) -> bool: + return self._alive + + +class TestDatasetRetrievalAdditionalHelpers: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def test_llm_usage_and_record_usage(self, retrieval: DatasetRetrieval) -> None: + empty_usage = retrieval.llm_usage + assert empty_usage.total_tokens == 0 + + retrieval._record_usage(None) + assert retrieval.llm_usage.total_tokens == 0 + + usage_1 = LLMUsage.from_metadata({"prompt_tokens": 2, "completion_tokens": 3, "total_tokens": 5}) + usage_2 = LLMUsage.from_metadata({"prompt_tokens": 4, "completion_tokens": 1, "total_tokens": 5}) + retrieval._record_usage(usage_1) + retrieval._record_usage(usage_2) + assert retrieval.llm_usage.total_tokens == 10 + + def test_replace_metadata_filter_value(self, retrieval: DatasetRetrieval) -> None: + assert retrieval._replace_metadata_filter_value("plain", {}) == "plain" + replaced = retrieval._replace_metadata_filter_value( + "hello {{name}}\n\t{{missing}}", + {"name": "world"}, + ) + assert replaced == "hello world {{missing}}" + + def test_process_metadata_filter_in_with_scalar_fallback(self) -> None: + filters: list = [] + result = DatasetRetrieval.process_metadata_filter_func( + sequence=0, + condition="in", + metadata_name="category", + value=123, + filters=filters, + ) + assert result is filters + assert len(filters) == 1 + + def test_calculate_vector_score(self, retrieval: DatasetRetrieval) -> None: + doc_high = Document(page_content="a", metadata={"score": 0.9}, provider="dify") + doc_low = Document(page_content="b", metadata={"score": 0.2}, provider="dify") + doc_no_meta = Document(page_content="c", metadata={}, provider="dify") + + filtered = retrieval.calculate_vector_score([doc_low, doc_high, doc_no_meta], top_k=1, score_threshold=0.5) + assert len(filtered) == 1 + assert filtered[0].metadata["score"] == 0.9 + + assert retrieval.calculate_vector_score([doc_low], top_k=2, score_threshold=1.0) == [] + + def test_calculate_keyword_score(self, retrieval: DatasetRetrieval) -> None: + documents = [ + Document(page_content="python language", metadata={"doc_id": "1"}, provider="dify"), + Document(page_content="java language", metadata={"doc_id": "2"}, provider="dify"), + ] + keyword_handler = Mock() + keyword_handler.extract_keywords.side_effect = [ + ["python", "language"], + ["python", "language"], + ["java", "language"], + ] + + with patch("core.rag.retrieval.dataset_retrieval.JiebaKeywordTableHandler", return_value=keyword_handler): + ranked = retrieval.calculate_keyword_score("python language", documents, top_k=1) + + assert len(ranked) == 1 + assert "keywords" in ranked[0].metadata + assert ranked[0].metadata["doc_id"] == "1" + + def test_send_trace_task(self, retrieval: DatasetRetrieval) -> None: + trace_manager = Mock() + retrieval.application_generate_entity = SimpleNamespace(trace_manager=trace_manager) + docs = [Document(page_content="d", metadata={}, provider="dify")] + + retrieval._send_trace_task("m1", docs, {"cost": 1}) + trace_manager.add_trace_task.assert_called_once() + + retrieval.application_generate_entity = None + trace_manager.reset_mock() + retrieval._send_trace_task("m1", docs, {"cost": 1}) + trace_manager.add_trace_task.assert_not_called() + + def test_on_query(self, retrieval: DatasetRetrieval) -> None: + with patch("core.rag.retrieval.dataset_retrieval.db.session") as mock_session: + retrieval._on_query( + query=None, + attachment_ids=None, + dataset_ids=["d1"], + app_id="a1", + user_from="account", + user_id="u1", + ) + mock_session.add_all.assert_not_called() + + retrieval._on_query( + query="python", + attachment_ids=["f1"], + dataset_ids=["d1", "d2"], + app_id="a1", + user_from="account", + user_id="u1", + ) + mock_session.add_all.assert_called() + mock_session.commit.assert_called() + + def test_handle_invoke_result(self, retrieval: DatasetRetrieval) -> None: + usage = LLMUsage.empty_usage() + chunk_1 = SimpleNamespace( + model="m1", + prompt_messages=[Mock()], + delta=SimpleNamespace(message=SimpleNamespace(content="hello "), usage=usage), + ) + chunk_2 = SimpleNamespace( + model="m1", + prompt_messages=[Mock()], + delta=SimpleNamespace( + message=SimpleNamespace(content=[SimpleNamespace(data="world")]), + usage=None, + ), + ) + text, returned_usage = retrieval._handle_invoke_result(iter([chunk_1, chunk_2])) + assert text == "hello world" + assert returned_usage == usage + + text_empty, usage_empty = retrieval._handle_invoke_result(iter([])) + assert text_empty == "" + assert usage_empty == LLMUsage.empty_usage() + + def test_get_prompt_template(self, retrieval: DatasetRetrieval) -> None: + model_config_chat = ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt", + model_schema=Mock(), + mode="chat", + provider_model_bundle=Mock(), + credentials={}, + parameters={}, + stop=["x"], + ) + model_config_completion = ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt", + model_schema=Mock(), + mode="completion", + provider_model_bundle=Mock(), + credentials={}, + parameters={}, + stop=[], + ) + + with patch("core.rag.retrieval.dataset_retrieval.AdvancedPromptTransform") as mock_prompt_transform: + mock_prompt_transform.return_value.get_prompt.return_value = ["prompt"] + prompt_messages, stop = retrieval._get_prompt_template( + model_config=model_config_chat, + mode="chat", + metadata_fields=["author"], + query="python", + ) + assert prompt_messages == ["prompt"] + assert stop == ["x"] + + with patch( + "core.rag.retrieval.dataset_retrieval.METADATA_FILTER_COMPLETION_PROMPT", + "{input_text} {metadata_fields}", + ): + prompt_messages_completion, stop_completion = retrieval._get_prompt_template( + model_config=model_config_completion, + mode="completion", + metadata_fields=["author"], + query="python", + ) + assert prompt_messages_completion == ["prompt"] + assert stop_completion == [] + + with pytest.raises(ValueError): + retrieval._get_prompt_template( + model_config=model_config_chat, + mode="unknown-mode", + metadata_fields=[], + query="python", + ) + + def test_fetch_model_config_validation_and_success(self, retrieval: DatasetRetrieval) -> None: + with pytest.raises(ValueError, match="single_retrieval_config is required"): + retrieval._fetch_model_config("tenant-1", None) # type: ignore[arg-type] + + model_cfg = AppModelConfig(provider="openai", name="gpt", mode="chat", completion_params={"stop": ["END"]}) + model_instance = Mock() + model_instance.credentials = {"k": "v"} + model_instance.provider_model_bundle = Mock() + model_instance.model_type_instance = Mock() + model_instance.model_type_instance.get_model_schema.return_value = Mock() + + with ( + patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_manager, + patch("core.rag.retrieval.dataset_retrieval.ModelConfigWithCredentialsEntity") as mock_cfg_entity, + ): + mock_manager.return_value.get_model_instance.return_value = model_instance + mock_cfg_entity.return_value = SimpleNamespace( + provider="openai", + model="gpt", + stop=["END"], + parameters={"temperature": 0.1}, + ) + + model_instance.provider_model_bundle.configuration.get_provider_model.return_value = None + with pytest.raises(ValueError, match="not exist"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + provider_model = SimpleNamespace(status=ModelStatus.NO_CONFIGURE) + model_instance.provider_model_bundle.configuration.get_provider_model.return_value = provider_model + with pytest.raises(ValueError, match="credentials is not initialized"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + provider_model.status = ModelStatus.NO_PERMISSION + with pytest.raises(ValueError, match="currently not support"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + provider_model.status = ModelStatus.QUOTA_EXCEEDED + with pytest.raises(ValueError, match="quota exceeded"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + provider_model.status = ModelStatus.ACTIVE + bad_mode_cfg = AppModelConfig(provider="openai", name="gpt", mode="chat") + bad_mode_cfg.mode = None # type: ignore[assignment] + with pytest.raises(ValueError, match="LLM mode is required"): + retrieval._fetch_model_config("tenant-1", bad_mode_cfg) + + model_instance.model_type_instance.get_model_schema.return_value = None + with pytest.raises(ValueError, match="not exist"): + retrieval._fetch_model_config("tenant-1", model_cfg) + + model_instance.model_type_instance.get_model_schema.return_value = Mock() + model_cfg_success = AppModelConfig( + provider="openai", + name="gpt", + mode="chat", + completion_params={"temperature": 0.1, "stop": ["END"]}, + ) + _, config = retrieval._fetch_model_config("tenant-1", model_cfg_success) + assert config.provider == "openai" + assert config.model == "gpt" + assert config.stop == ["END"] + assert "stop" not in config.parameters + + def test_automatic_metadata_filter_func(self, retrieval: DatasetRetrieval) -> None: + metadata_field = SimpleNamespace(name="author") + model_instance = Mock() + model_instance.invoke_llm.return_value = iter([Mock()]) + model_config = ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt", + model_schema=Mock(), + mode="chat", + provider_model_bundle=Mock(), + credentials={}, + parameters={}, + stop=[], + ) + usage = LLMUsage.from_metadata({"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}) + session_scalars = Mock() + session_scalars.all.return_value = [metadata_field] + + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalars", return_value=session_scalars), + patch.object(retrieval, "_fetch_model_config", return_value=(model_instance, model_config)), + patch.object(retrieval, "_get_prompt_template", return_value=(["prompt"], [])), + patch.object(retrieval, "_handle_invoke_result", return_value=('{"metadata_map":[]}', usage)), + patch("core.rag.retrieval.dataset_retrieval.parse_and_check_json_markdown") as mock_parse, + patch.object(retrieval, "_record_usage") as mock_record_usage, + ): + mock_parse.return_value = { + "metadata_map": [ + { + "metadata_field_name": "author", + "metadata_field_value": "Alice", + "comparison_operator": "contains", + }, + { + "metadata_field_name": "ignored", + "metadata_field_value": "value", + "comparison_operator": "contains", + }, + ] + } + result = retrieval._automatic_metadata_filter_func( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + ) + + assert result == [{"metadata_name": "author", "value": "Alice", "condition": "contains"}] + mock_record_usage.assert_called_once_with(usage) + + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalars", return_value=session_scalars), + patch.object(retrieval, "_fetch_model_config", side_effect=RuntimeError("boom")), + ): + with pytest.raises(RuntimeError, match="boom"): + retrieval._automatic_metadata_filter_func( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + ) + + def test_get_metadata_filter_condition(self, retrieval: DatasetRetrieval) -> None: + db_query = Mock() + db_query.where.return_value = db_query + db_query.all.return_value = [SimpleNamespace(dataset_id="d1", id="doc-1")] + + with patch("core.rag.retrieval.dataset_retrieval.db.session.query", return_value=db_query): + mapping, condition = retrieval.get_metadata_filter_condition( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_filtering_mode="disabled", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + metadata_filtering_conditions=None, + inputs={}, + ) + assert mapping is None + assert condition is None + + automatic_filters = [{"condition": "contains", "metadata_name": "author", "value": "Alice"}] + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.query", return_value=db_query), + patch.object(retrieval, "_automatic_metadata_filter_func", return_value=automatic_filters), + ): + mapping, condition = retrieval.get_metadata_filter_condition( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_filtering_mode="automatic", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + metadata_filtering_conditions=AppMetadataFilteringCondition(logical_operator="or", conditions=[]), + inputs={}, + ) + assert mapping == {"d1": ["doc-1"]} + assert condition is not None + assert condition.logical_operator == "or" + + manual_conditions = AppMetadataFilteringCondition( + logical_operator="and", + conditions=[AppCondition(name="author", comparison_operator="contains", value="{{name}}")], + ) + with patch("core.rag.retrieval.dataset_retrieval.db.session.query", return_value=db_query): + mapping, condition = retrieval.get_metadata_filter_condition( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_filtering_mode="manual", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + metadata_filtering_conditions=manual_conditions, + inputs={"name": "Alice"}, + ) + assert mapping == {"d1": ["doc-1"]} + assert condition is not None + assert condition.conditions[0].value == "Alice" + + with patch("core.rag.retrieval.dataset_retrieval.db.session.query", return_value=db_query): + with pytest.raises(ValueError, match="Invalid metadata filtering mode"): + retrieval.get_metadata_filter_condition( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="u1", + metadata_filtering_mode="unsupported", + metadata_model_config=AppModelConfig(provider="openai", name="gpt", mode="chat"), + metadata_filtering_conditions=None, + inputs={}, + ) + + def test_get_available_datasets(self, retrieval: DatasetRetrieval) -> None: + session = Mock() + subquery_query = Mock() + subquery_query.where.return_value = subquery_query + subquery_query.group_by.return_value = subquery_query + subquery_query.having.return_value = subquery_query + subquery_query.subquery.return_value = SimpleNamespace( + c=SimpleNamespace( + dataset_id=column("dataset_id"), available_document_count=column("available_document_count") + ) + ) + + dataset_query = Mock() + dataset_query.outerjoin.return_value = dataset_query + dataset_query.where.return_value = dataset_query + dataset_query.all.return_value = [SimpleNamespace(id="d1"), None, SimpleNamespace(id="d2")] + session.query.side_effect = [subquery_query, dataset_query] + + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = False + + with patch("core.rag.retrieval.dataset_retrieval.session_factory.create_session", return_value=session_ctx): + available = retrieval._get_available_datasets("tenant-1", ["d1", "d2"]) + + assert [dataset.id for dataset in available] == ["d1", "d2"] + + def test_check_knowledge_rate_limit(self, retrieval: DatasetRetrieval) -> None: + with ( + patch("core.rag.retrieval.dataset_retrieval.FeatureService.get_knowledge_rate_limit") as mock_limit, + patch("core.rag.retrieval.dataset_retrieval.redis_client") as mock_redis, + patch("core.rag.retrieval.dataset_retrieval.time.time", return_value=100.0), + ): + mock_limit.return_value = SimpleNamespace(enabled=True, limit=2, subscription_plan="pro") + mock_redis.zcard.return_value = 1 + retrieval._check_knowledge_rate_limit("tenant-1") + mock_redis.zadd.assert_called_once() + + session = Mock() + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = False + + with ( + patch("core.rag.retrieval.dataset_retrieval.FeatureService.get_knowledge_rate_limit") as mock_limit, + patch("core.rag.retrieval.dataset_retrieval.redis_client") as mock_redis, + patch("core.rag.retrieval.dataset_retrieval.time.time", return_value=100.0), + patch("core.rag.retrieval.dataset_retrieval.session_factory.create_session", return_value=session_ctx), + ): + mock_limit.return_value = SimpleNamespace(enabled=True, limit=1, subscription_plan="pro") + mock_redis.zcard.return_value = 2 + with pytest.raises(exc.RateLimitExceededError): + retrieval._check_knowledge_rate_limit("tenant-1") + session.add.assert_called_once() + + with patch("core.rag.retrieval.dataset_retrieval.FeatureService.get_knowledge_rate_limit") as mock_limit: + mock_limit.return_value = SimpleNamespace(enabled=False) + retrieval._check_knowledge_rate_limit("tenant-1") + + +def _doc( + provider: str = "dify", + content: str = "content", + score: float = 0.9, + dataset_id: str = "dataset-1", + document_id: str = "document-1", + doc_id: str = "node-1", + extra: dict | None = None, +) -> Document: + metadata = { + "score": score, + "dataset_id": dataset_id, + "document_id": document_id, + "doc_id": doc_id, + } + if extra: + metadata.update(extra) + return Document(page_content=content, metadata=metadata, provider=provider) + + +class _ImmediateThread: + def __init__(self, target=None, kwargs=None): + self._target = target + self._kwargs = kwargs or {} + self._alive = False + + def start(self) -> None: + self._alive = True + if self._target: + self._target(**self._kwargs) + self._alive = False + + def join(self, timeout=None) -> None: + return None + + def is_alive(self) -> bool: + return self._alive + + +class _JoinDrivenThread: + def __init__(self, target=None, kwargs=None): + self._target = target + self._kwargs = kwargs or {} + self._started = False + self._alive = False + + def start(self) -> None: + self._started = True + self._alive = True + + def join(self, timeout=None) -> None: + if self._started and self._alive and self._target: + self._target(**self._kwargs) + self._alive = False + + def is_alive(self) -> bool: + return self._alive + + +@contextmanager +def _timer(): + yield {"cost": 1} + + +class TestKnowledgeRetrievalCoverage: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def test_returns_empty_when_query_missing(self, retrieval: DatasetRetrieval) -> None: + request = KnowledgeRetrievalRequest( + tenant_id="tenant-1", + user_id="user-1", + app_id="app-1", + user_from="workflow", + dataset_ids=["d1"], + query=None, + retrieval_mode="multiple", + ) + with ( + patch.object(retrieval, "_check_knowledge_rate_limit"), + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="d1")]), + ): + assert retrieval.knowledge_retrieval(request) == [] + + def test_raises_when_metadata_model_config_missing(self, retrieval: DatasetRetrieval) -> None: + request = KnowledgeRetrievalRequest( + tenant_id="tenant-1", + user_id="user-1", + app_id="app-1", + user_from="workflow", + dataset_ids=["d1"], + query="query", + retrieval_mode="multiple", + metadata_filtering_mode="automatic", + metadata_model_config=None, + ) + with ( + patch.object(retrieval, "_check_knowledge_rate_limit"), + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="d1")]), + ): + with pytest.raises(ValueError, match="metadata_model_config is required"): + retrieval.knowledge_retrieval(request) + + @pytest.mark.parametrize( + ("status", "error_cls"), + [ + (ModelStatus.NO_CONFIGURE, "ModelCredentialsNotInitializedError"), + (ModelStatus.NO_PERMISSION, "ModelNotSupportedError"), + (ModelStatus.QUOTA_EXCEEDED, "ModelQuotaExceededError"), + ], + ) + def test_single_mode_raises_for_model_status( + self, + retrieval: DatasetRetrieval, + status: ModelStatus, + error_cls: str, + ) -> None: + request = KnowledgeRetrievalRequest( + tenant_id="tenant-1", + user_id="user-1", + app_id="app-1", + user_from="workflow", + dataset_ids=["dataset-1"], + query="python", + retrieval_mode="single", + model_provider="openai", + model_name="gpt-4", + ) + provider_model_bundle = Mock() + provider_model_bundle.configuration.get_provider_model.return_value = SimpleNamespace(status=status) + model_type_instance = Mock() + model_type_instance.get_model_schema.return_value = Mock() + model_instance = SimpleNamespace( + provider_model_bundle=provider_model_bundle, + model_type_instance=model_type_instance, + credentials={}, + ) + with ( + patch.object(retrieval, "_check_knowledge_rate_limit"), + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="dataset-1")]), + patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_model_manager, + ): + mock_model_manager.return_value.get_model_instance.return_value = model_instance + with pytest.raises(Exception) as exc_info: + retrieval.knowledge_retrieval(request) + assert error_cls in type(exc_info.value).__name__ + + +class TestRetrieveCoverage: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def _build_model_config(self, features: list[ModelFeature] | None = None): + model_type_instance = Mock() + model_type_instance.get_model_schema.return_value = SimpleNamespace(features=features or []) + provider_bundle = SimpleNamespace(model_type_instance=model_type_instance) + return ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt-4", + model_schema=Mock(), + mode="chat", + provider_model_bundle=provider_bundle, + credentials={}, + parameters={}, + stop=[], + ) + + def test_returns_none_when_dataset_ids_empty(self, retrieval: DatasetRetrieval) -> None: + config = DatasetEntity( + dataset_ids=[], + retrieve_config=DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + ), + ) + result = retrieval.retrieve( + app_id="app-1", + user_id="user-1", + tenant_id="tenant-1", + model_config=self._build_model_config(), + config=config, + query="python", + invoke_from=InvokeFrom.WEB_APP, + show_retrieve_source=False, + hit_callback=Mock(), + message_id="m1", + ) + assert result == (None, []) + + def test_returns_none_when_model_schema_missing(self, retrieval: DatasetRetrieval) -> None: + config = DatasetEntity( + dataset_ids=["d1"], + retrieve_config=DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + ), + ) + model_config = self._build_model_config() + model_config.provider_model_bundle.model_type_instance.get_model_schema.return_value = None + with patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_model_manager: + mock_model_manager.return_value.get_model_instance.return_value = Mock() + result = retrieval.retrieve( + app_id="app-1", + user_id="user-1", + tenant_id="tenant-1", + model_config=model_config, + config=config, + query="python", + invoke_from=InvokeFrom.WEB_APP, + show_retrieve_source=False, + hit_callback=Mock(), + message_id="m1", + ) + assert result == (None, []) + + def test_single_strategy_with_external_documents(self, retrieval: DatasetRetrieval) -> None: + retrieve_config = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE, + metadata_filtering_mode="disabled", + ) + config = DatasetEntity(dataset_ids=["d1"], retrieve_config=retrieve_config) + model_config = self._build_model_config() + external_doc = _doc( + provider="external", + content="external content", + dataset_id="ext-ds", + document_id="ext-doc", + doc_id="ext-node", + extra={"title": "External", "dataset_name": "External DS"}, + ) + with ( + patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_model_manager, + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="d1")]), + patch.object(retrieval, "get_metadata_filter_condition", return_value=(None, None)), + patch.object(retrieval, "single_retrieve", return_value=[external_doc]), + ): + mock_model_manager.return_value.get_model_instance.return_value = Mock() + context, files = retrieval.retrieve( + app_id="app-1", + user_id="user-1", + tenant_id="tenant-1", + model_config=model_config, + config=config, + query="python", + invoke_from=InvokeFrom.WEB_APP, + show_retrieve_source=False, + hit_callback=Mock(), + message_id="m1", + ) + assert context == "external content" + assert files == [] + + def test_multiple_strategy_with_vision_and_source_details(self, retrieval: DatasetRetrieval) -> None: + retrieve_config = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + top_k=4, + score_threshold=0.1, + rerank_mode="reranking_model", + reranking_model={"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v3"}, + reranking_enabled=True, + metadata_filtering_mode="disabled", + ) + config = DatasetEntity(dataset_ids=["d1"], retrieve_config=retrieve_config) + model_config = self._build_model_config(features=[ModelFeature.TOOL_CALL]) + external_doc = _doc( + provider="external", + content="external body", + score=0.8, + dataset_id="ext-ds", + document_id="ext-doc", + doc_id="ext-node", + extra={"title": "External Title", "dataset_name": "External DS"}, + ) + dify_doc = _doc( + provider="dify", + content="dify body", + score=0.9, + dataset_id="d1", + document_id="doc-1", + doc_id="node-1", + ) + record = SimpleNamespace( + segment=SimpleNamespace( + id="segment-1", + dataset_id="d1", + document_id="doc-1", + tenant_id="tenant-1", + hit_count=3, + word_count=11, + position=1, + index_node_hash="hash-1", + content="segment content", + answer="segment answer", + get_sign_content=lambda: "segment content", + ), + score=0.9, + summary="short summary", + files=None, + ) + dataset_item = SimpleNamespace(id="d1", name="Dataset One") + document_item = SimpleNamespace( + id="doc-1", + name="Document One", + data_source_type="upload_file", + doc_metadata={"lang": "en"}, + ) + upload_file = SimpleNamespace( + id="file-1", + name="image", + extension="png", + mime_type="image/png", + source_url="https://example.com/img.png", + size=123, + key="k1", + ) + execute_attachments = SimpleNamespace(all=lambda: [(SimpleNamespace(), upload_file)]) + execute_docs = SimpleNamespace(scalars=lambda: SimpleNamespace(all=lambda: [document_item])) + execute_datasets = SimpleNamespace(scalars=lambda: SimpleNamespace(all=lambda: [dataset_item])) + hit_callback = Mock() + + with ( + patch("core.rag.retrieval.dataset_retrieval.ModelManager") as mock_model_manager, + patch.object(retrieval, "_get_available_datasets", return_value=[SimpleNamespace(id="d1")]), + patch.object(retrieval, "get_metadata_filter_condition", return_value=(None, None)), + patch.object(retrieval, "multiple_retrieve", return_value=[external_doc, dify_doc]), + patch( + "core.rag.retrieval.dataset_retrieval.RetrievalService.format_retrieval_documents", + return_value=[record], + ), + patch("core.rag.retrieval.dataset_retrieval.sign_upload_file", return_value="https://signed"), + patch("core.rag.retrieval.dataset_retrieval.db.session.execute") as mock_execute, + ): + mock_model_manager.return_value.get_model_instance.return_value = Mock() + mock_execute.side_effect = [execute_attachments, execute_docs, execute_datasets] + context, files = retrieval.retrieve( + app_id="app-1", + user_id="user-1", + tenant_id="tenant-1", + model_config=model_config, + config=config, + query="python", + invoke_from=InvokeFrom.DEBUGGER, + show_retrieve_source=True, + hit_callback=hit_callback, + message_id="m1", + vision_enabled=True, + ) + + assert "short summary" in (context or "") + assert "question:segment content answer:segment answer" in (context or "") + assert len(files or []) == 1 + hit_callback.return_retriever_resource_info.assert_called_once() + + +class TestSingleAndMultipleRetrieveCoverage: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def test_single_retrieve_external_path(self, retrieval: DatasetRetrieval) -> None: + dataset = SimpleNamespace( + id="ds-1", + name="External DS", + description=None, + provider="external", + tenant_id="tenant-1", + retrieval_model={"top_k": 2}, + indexing_technique="high_quality", + ) + app = Flask(__name__) + usage = LLMUsage.from_metadata({"prompt_tokens": 1, "completion_tokens": 1, "total_tokens": 2}) + with app.app_context(): + with ( + patch("core.rag.retrieval.dataset_retrieval.ReactMultiDatasetRouter") as mock_router_cls, + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset), + patch( + "core.rag.retrieval.dataset_retrieval.ExternalDatasetService.fetch_external_knowledge_retrieval" + ) as mock_external, + patch("core.rag.retrieval.dataset_retrieval.threading.Thread", _ImmediateThread), + patch.object(retrieval, "_on_retrieval_end") as mock_end, + patch.object(retrieval, "_on_query"), + ): + mock_router_cls.return_value.invoke.return_value = ("ds-1", usage) + mock_external.return_value = [ + {"content": "ext result", "metadata": {"k": "v"}, "score": 0.9, "title": "Ext Doc"} + ] + result = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[dataset], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.REACT_ROUTER, + message_id="m1", + ) + + assert len(result) == 1 + assert result[0].provider == "external" + mock_end.assert_called_once() + assert retrieval.llm_usage.total_tokens == 2 + + def test_single_retrieve_dify_path_and_filters(self, retrieval: DatasetRetrieval) -> None: + dataset = SimpleNamespace( + id="ds-1", + name="Internal DS", + description="dataset desc", + provider="dify", + tenant_id="tenant-1", + indexing_technique="high_quality", + retrieval_model={ + "search_method": "semantic_search", + "reranking_enable": True, + "reranking_model": {"reranking_provider_name": "cohere", "reranking_model_name": "rerank"}, + "reranking_mode": "reranking_model", + "weights": {"vector_setting": {}}, + "top_k": 3, + "score_threshold_enabled": True, + "score_threshold": 0.2, + }, + ) + app = Flask(__name__) + usage = LLMUsage.from_metadata({"prompt_tokens": 1, "completion_tokens": 0, "total_tokens": 1}) + result_doc = _doc(provider="dify", score=0.7, dataset_id="ds-1", document_id="doc-1", doc_id="node-1") + with app.app_context(): + with ( + patch("core.rag.retrieval.dataset_retrieval.FunctionCallMultiDatasetRouter") as mock_router_cls, + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset), + patch( + "core.rag.retrieval.dataset_retrieval.RetrievalService.retrieve", return_value=[result_doc] + ) as mock_retrieve, + patch("core.rag.retrieval.dataset_retrieval.threading.Thread", _ImmediateThread), + patch.object(retrieval, "_on_retrieval_end"), + patch.object(retrieval, "_on_query"), + ): + mock_router_cls.return_value.invoke.return_value = ("ds-1", usage) + results = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[dataset], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.ROUTER, + metadata_filter_document_ids={"ds-1": ["doc-1"]}, + metadata_condition=SimpleNamespace(), + ) + + assert results == [result_doc] + assert mock_retrieve.call_args.kwargs["document_ids_filter"] == ["doc-1"] + assert retrieval.llm_usage.total_tokens == 1 + + def test_single_retrieve_returns_empty_when_no_dataset_selected(self, retrieval: DatasetRetrieval) -> None: + with patch("core.rag.retrieval.dataset_retrieval.ReactMultiDatasetRouter") as mock_router_cls: + mock_router_cls.return_value.invoke.return_value = (None, LLMUsage.empty_usage()) + results = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[ + SimpleNamespace(id="ds-1", name="DS", description=None), + ], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.REACT_ROUTER, + ) + assert results == [] + + def test_single_retrieve_respects_metadata_filter_shortcuts(self, retrieval: DatasetRetrieval) -> None: + dataset = SimpleNamespace( + id="ds-1", + name="Internal DS", + description="desc", + provider="dify", + tenant_id="tenant-1", + indexing_technique="high_quality", + retrieval_model={"top_k": 2, "search_method": "semantic_search", "reranking_enable": False}, + ) + with ( + patch("core.rag.retrieval.dataset_retrieval.ReactMultiDatasetRouter") as mock_router_cls, + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset), + ): + mock_router_cls.return_value.invoke.return_value = ("ds-1", LLMUsage.empty_usage()) + no_filter = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[dataset], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.REACT_ROUTER, + metadata_filter_document_ids=None, + metadata_condition=SimpleNamespace(), + ) + missing_doc_ids = retrieval.single_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + query="python", + available_datasets=[dataset], + model_instance=Mock(), + model_config=Mock(), + planning_strategy=PlanningStrategy.REACT_ROUTER, + metadata_filter_document_ids={"other-ds": ["x"]}, + metadata_condition=None, + ) + assert no_filter == [] + assert missing_doc_ids == [] + + def test_multiple_retrieve_validation_paths(self, retrieval: DatasetRetrieval) -> None: + assert ( + retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=[], + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode="reranking_model", + ) + == [] + ) + + mixed = [ + SimpleNamespace(id="d1", indexing_technique="high_quality"), + SimpleNamespace(id="d2", indexing_technique="economy"), + ] + with pytest.raises(ValueError, match="different indexing technique"): + retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=mixed, + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode="weighted_score", + reranking_enable=False, + ) + + high_quality_mismatch = [ + SimpleNamespace( + id="d1", + indexing_technique="high_quality", + embedding_model="model-a", + embedding_model_provider="provider-a", + ), + SimpleNamespace( + id="d2", + indexing_technique="high_quality", + embedding_model="model-b", + embedding_model_provider="provider-b", + ), + ] + with pytest.raises(ValueError, match="different embedding model"): + retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=high_quality_mismatch, + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode=RerankMode.WEIGHTED_SCORE, + reranking_enable=True, + ) + + def test_multiple_retrieve_threads_and_dedup(self, retrieval: DatasetRetrieval) -> None: + datasets = [ + SimpleNamespace( + id="d1", + indexing_technique="high_quality", + embedding_model="model-a", + embedding_model_provider="provider-a", + ), + SimpleNamespace( + id="d2", + indexing_technique="high_quality", + embedding_model="model-a", + embedding_model_provider="provider-a", + ), + ] + doc_a = _doc(provider="dify", score=0.8, dataset_id="d1", document_id="doc-1", doc_id="dup") + doc_b = _doc(provider="dify", score=0.7, dataset_id="d2", document_id="doc-2", doc_id="dup") + doc_external = _doc( + provider="external", + score=0.9, + dataset_id="ext-ds", + document_id="ext-doc", + doc_id="ext-node", + extra={"dataset_name": "Ext", "title": "Ext"}, + ) + app = Flask(__name__) + weights: WeightsDict = { + "vector_setting": {"vector_weight": 0.5, "embedding_provider_name": "", "embedding_model_name": ""}, + "keyword_setting": {"keyword_weight": 0.5}, + } + + def fake_multiple_thread(**kwargs): + if kwargs["query"]: + kwargs["all_documents"].extend([doc_a, doc_b]) + if kwargs["attachment_id"]: + kwargs["all_documents"].append(doc_external) + + with app.app_context(): + with ( + patch("core.rag.retrieval.dataset_retrieval.measure_time", _timer), + patch("core.rag.retrieval.dataset_retrieval.threading.Thread", _ImmediateThread), + patch.object(retrieval, "_multiple_retrieve_thread", side_effect=fake_multiple_thread), + patch.object(retrieval, "_on_query") as mock_on_query, + patch.object(retrieval, "_on_retrieval_end") as mock_end, + ): + result = retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=datasets, + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode=RerankMode.WEIGHTED_SCORE, + reranking_enable=True, + weights=weights, + attachment_ids=["att-1"], + message_id="m1", + ) + + assert len(result) == 2 + assert any(doc.provider == "external" for doc in result) + assert weights["vector_setting"]["embedding_provider_name"] == "provider-a" + assert weights["vector_setting"]["embedding_model_name"] == "model-a" + mock_on_query.assert_called_once() + mock_end.assert_called_once() + + def test_multiple_retrieve_propagates_thread_exception(self, retrieval: DatasetRetrieval) -> None: + datasets = [ + SimpleNamespace( + id="d1", + indexing_technique="high_quality", + embedding_model="model-a", + embedding_model_provider="provider-a", + ) + ] + app = Flask(__name__) + + def failing_thread(**kwargs): + kwargs["thread_exceptions"].append(RuntimeError("thread boom")) + + with app.app_context(): + with ( + patch("core.rag.retrieval.dataset_retrieval.measure_time", _timer), + patch("core.rag.retrieval.dataset_retrieval.threading.Thread", _ImmediateThread), + patch.object(retrieval, "_multiple_retrieve_thread", side_effect=failing_thread), + ): + with pytest.raises(RuntimeError, match="thread boom"): + retrieval.multiple_retrieve( + app_id="app-1", + tenant_id="tenant-1", + user_id="user-1", + user_from="workflow", + available_datasets=datasets, + query="python", + top_k=2, + score_threshold=0.0, + reranking_mode="reranking_model", + ) + + +class TestInternalHooksCoverage: + @pytest.fixture + def retrieval(self) -> DatasetRetrieval: + return DatasetRetrieval() + + def test_on_retrieval_end_without_dify_documents(self, retrieval: DatasetRetrieval) -> None: + app = Flask(__name__) + with patch.object(retrieval, "_send_trace_task") as mock_trace: + retrieval._on_retrieval_end( + flask_app=app, + documents=[_doc(provider="external")], + message_id="m1", + timer={"cost": 1}, + ) + mock_trace.assert_called_once() + + def test_on_retrieval_end_dify_without_document_ids(self, retrieval: DatasetRetrieval) -> None: + app = Flask(__name__) + doc = Document(page_content="x", metadata={"doc_id": "n1"}, provider="dify") + with ( + patch("core.rag.retrieval.dataset_retrieval.db", SimpleNamespace(engine=Mock())), + patch.object(retrieval, "_send_trace_task") as mock_trace, + ): + retrieval._on_retrieval_end(flask_app=app, documents=[doc], message_id="m1", timer={"cost": 1}) + mock_trace.assert_called_once() + + def test_on_retrieval_end_updates_segments_for_text_and_image(self, retrieval: DatasetRetrieval) -> None: + app = Flask(__name__) + docs = [ + _doc(provider="dify", document_id="doc-a", doc_id="idx-a", extra={"doc_type": "text"}), + _doc(provider="dify", document_id="doc-b", doc_id="att-b", extra={"doc_type": DocType.IMAGE}), + _doc(provider="dify", document_id="doc-c", doc_id="idx-c", extra={"doc_type": "text"}), + _doc(provider="dify", document_id="doc-d", doc_id="att-d", extra={"doc_type": DocType.IMAGE}), + ] + dataset_docs = [ + SimpleNamespace(id="doc-a", doc_form=IndexStructureType.PARENT_CHILD_INDEX), + SimpleNamespace(id="doc-b", doc_form=IndexStructureType.PARENT_CHILD_INDEX), + SimpleNamespace(id="doc-c", doc_form="qa_model"), + SimpleNamespace(id="doc-d", doc_form="qa_model"), + ] + child_chunks = [SimpleNamespace(index_node_id="idx-a", segment_id="seg-a")] + segments = [SimpleNamespace(index_node_id="idx-c", id="seg-c")] + bindings = [SimpleNamespace(segment_id="seg-b"), SimpleNamespace(segment_id="seg-d")] + + def _scalars(items): + result = Mock() + result.all.return_value = items + return result + + session = Mock() + session.scalars.side_effect = [ + _scalars(dataset_docs), + _scalars(child_chunks), + _scalars(segments), + _scalars(bindings), + ] + query = Mock() + query.where.return_value = query + session.query.return_value = query + session_ctx = MagicMock() + session_ctx.__enter__.return_value = session + session_ctx.__exit__.return_value = False + + with ( + patch("core.rag.retrieval.dataset_retrieval.db", SimpleNamespace(engine=Mock())), + patch("core.rag.retrieval.dataset_retrieval.Session", return_value=session_ctx), + patch.object(retrieval, "_send_trace_task") as mock_trace, + ): + retrieval._on_retrieval_end(flask_app=app, documents=docs, message_id="m1", timer={"cost": 1}) + + query.update.assert_called_once() + session.commit.assert_called_once() + mock_trace.assert_called_once() + + def test_retriever_variants(self, retrieval: DatasetRetrieval) -> None: + flask_app = SimpleNamespace(app_context=lambda: nullcontext()) + all_documents: list[Document] = [] + + with patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=None): + assert ( + retrieval._retriever( + flask_app=flask_app, # type: ignore[arg-type] + dataset_id="d1", + query="python", + top_k=1, + all_documents=all_documents, + ) + == [] + ) + + external_dataset = SimpleNamespace( + id="ext-ds", + name="External", + provider="external", + tenant_id="tenant-1", + retrieval_model={"top_k": 2}, + indexing_technique="high_quality", + ) + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=external_dataset), + patch( + "core.rag.retrieval.dataset_retrieval.ExternalDatasetService.fetch_external_knowledge_retrieval" + ) as mock_external, + ): + mock_external.return_value = [{"content": "e", "metadata": {}, "score": 0.8, "title": "Ext"}] + retrieval._retriever( + flask_app=flask_app, # type: ignore[arg-type] + dataset_id="ext-ds", + query="python", + top_k=1, + all_documents=all_documents, + ) + + economy_dataset = SimpleNamespace( + id="eco-ds", + provider="dify", + retrieval_model={"top_k": 1}, + indexing_technique="economy", + ) + high_dataset = SimpleNamespace( + id="hq-ds", + provider="dify", + retrieval_model={ + "search_method": "semantic_search", + "top_k": 4, + "score_threshold": 0.3, + "score_threshold_enabled": True, + "reranking_enable": True, + "reranking_model": {"reranking_provider_name": "x", "reranking_model_name": "y"}, + "reranking_mode": "reranking_model", + "weights": {"vector_setting": {}}, + }, + indexing_technique="high_quality", + ) + with ( + patch( + "core.rag.retrieval.dataset_retrieval.db.session.scalar", side_effect=[economy_dataset, high_dataset] + ), + patch( + "core.rag.retrieval.dataset_retrieval.RetrievalService.retrieve", return_value=[_doc(provider="dify")] + ) as mock_retrieve, + ): + retrieval._retriever( + flask_app=flask_app, # type: ignore[arg-type] + dataset_id="eco-ds", + query="python", + top_k=2, + all_documents=all_documents, + ) + retrieval._retriever( + flask_app=flask_app, # type: ignore[arg-type] + dataset_id="hq-ds", + query="python", + top_k=2, + all_documents=all_documents, + attachment_ids=["att-1"], + ) + assert mock_retrieve.call_count == 2 + assert len(all_documents) >= 3 + + def test_to_dataset_retriever_tool_paths(self, retrieval: DatasetRetrieval) -> None: + dataset_skip_zero = SimpleNamespace(id="d1", provider="dify", available_document_count=0) + dataset_ok_single = SimpleNamespace( + id="d2", + provider="dify", + available_document_count=2, + retrieval_model={"top_k": 2, "score_threshold_enabled": True, "score_threshold": 0.1}, + ) + single_config = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE, + metadata_filtering_mode="disabled", + ) + with ( + patch( + "core.rag.retrieval.dataset_retrieval.db.session.scalar", + side_effect=[None, dataset_skip_zero, dataset_ok_single], + ), + patch( + "core.tools.utils.dataset_retriever.dataset_retriever_tool.DatasetRetrieverTool.from_dataset", + return_value="single-tool", + ) as mock_single_tool, + ): + single_tools = retrieval.to_dataset_retriever_tool( + tenant_id="tenant-1", + dataset_ids=["missing", "d1", "d2"], + retrieve_config=single_config, + return_resource=True, + invoke_from=InvokeFrom.WEB_APP, + hit_callback=Mock(), + user_id="user-1", + inputs={"k": "v"}, + ) + + assert single_tools == ["single-tool"] + mock_single_tool.assert_called_once() + + multiple_config_missing = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + metadata_filtering_mode="disabled", + reranking_model=None, + ) + with patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset_ok_single): + with pytest.raises(ValueError, match="Reranking model is required"): + retrieval.to_dataset_retriever_tool( + tenant_id="tenant-1", + dataset_ids=["d2"], + retrieve_config=multiple_config_missing, + return_resource=True, + invoke_from=InvokeFrom.WEB_APP, + hit_callback=Mock(), + user_id="user-1", + inputs={}, + ) + + multiple_config = DatasetRetrieveConfigEntity( + retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE, + metadata_filtering_mode="disabled", + top_k=3, + score_threshold=0.2, + reranking_model={"reranking_provider_name": "cohere", "reranking_model_name": "rerank-v3"}, + ) + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalar", return_value=dataset_ok_single), + patch( + "core.tools.utils.dataset_retriever.dataset_multi_retriever_tool.DatasetMultiRetrieverTool.from_dataset", + return_value="multi-tool", + ) as mock_multi_tool, + ): + multi_tools = retrieval.to_dataset_retriever_tool( + tenant_id="tenant-1", + dataset_ids=["d2"], + retrieve_config=multiple_config, + return_resource=False, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="user-1", + inputs={}, + ) + assert multi_tools == ["multi-tool"] + mock_multi_tool.assert_called_once() + + def test_additional_small_branches(self, retrieval: DatasetRetrieval) -> None: + keyword_handler = Mock() + keyword_handler.extract_keywords.side_effect = [[], []] + doc = Document(page_content="doc", metadata={"doc_id": "1"}, provider="dify") + with patch("core.rag.retrieval.dataset_retrieval.JiebaKeywordTableHandler", return_value=keyword_handler): + ranked = retrieval.calculate_keyword_score("query", [doc], top_k=1) + assert len(ranked) == 1 + assert ranked[0].metadata.get("score") == 0.0 + + with patch("core.rag.retrieval.dataset_retrieval.db.session.scalars") as mock_scalars: + mock_scalars.return_value.all.return_value = [] + with pytest.raises(ValueError): + retrieval._automatic_metadata_filter_func( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="user-1", + metadata_model_config=None, # type: ignore[arg-type] + ) + + session_scalars = Mock() + session_scalars.all.return_value = [SimpleNamespace(name="author")] + with ( + patch("core.rag.retrieval.dataset_retrieval.db.session.scalars", return_value=session_scalars), + patch.object(retrieval, "_fetch_model_config", return_value=(Mock(), Mock())), + patch.object(retrieval, "_get_prompt_template", return_value=(["prompt"], [])), + patch.object(retrieval, "_record_usage"), + ): + model_instance = Mock() + model_instance.invoke_llm.side_effect = RuntimeError("nope") + with patch.object(retrieval, "_fetch_model_config", return_value=(model_instance, Mock())): + assert ( + retrieval._automatic_metadata_filter_func( + dataset_ids=["d1"], + query="python", + tenant_id="tenant-1", + user_id="user-1", + metadata_model_config=WorkflowModelConfig(provider="openai", name="gpt", mode="chat"), + ) + is None + ) + + with ( + patch("core.rag.retrieval.dataset_retrieval.ModelMode", return_value=object()), + patch("core.rag.retrieval.dataset_retrieval.AdvancedPromptTransform"), + ): + with pytest.raises(ValueError, match="not support"): + retrieval._get_prompt_template( + model_config=ModelConfigWithCredentialsEntity.model_construct( + provider="openai", + model="gpt", + model_schema=Mock(), + mode="chat", + provider_model_bundle=Mock(), + credentials={}, + parameters={}, + stop=[], + ), + mode="chat", + metadata_fields=[], + query="q", + ) diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py deleted file mode 100644 index 07d6e51e4b..0000000000 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py +++ /dev/null @@ -1,873 +0,0 @@ -""" -Unit tests for DatasetRetrieval.process_metadata_filter_func. - -This module provides comprehensive test coverage for the process_metadata_filter_func -method in the DatasetRetrieval class, which is responsible for building SQLAlchemy -filter expressions based on metadata filtering conditions. - -Conditions Tested: -================== -1. **String Conditions**: contains, not contains, start with, end with -2. **Equality Conditions**: is / =, is not / ≠ -3. **Null Conditions**: empty, not empty -4. **Numeric Comparisons**: before / <, after / >, ≤ / <=, ≥ / >= -5. **List Conditions**: in -6. **Edge Cases**: None values, different data types (str, int, float) - -Test Architecture: -================== -- Direct instantiation of DatasetRetrieval -- Mocking of DatasetDocument model attributes -- Verification of SQLAlchemy filter expressions -- Follows Arrange-Act-Assert (AAA) pattern - -Running Tests: -============== - # Run all tests in this module - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py -v - - # Run a specific test - uv run --project api pytest \ - api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_metadata_filter.py::\ -TestProcessMetadataFilterFunc::test_contains_condition -v -""" - -from unittest.mock import MagicMock - -import pytest - -from core.rag.retrieval.dataset_retrieval import DatasetRetrieval - - -class TestProcessMetadataFilterFunc: - """ - Comprehensive test suite for process_metadata_filter_func method. - - This test class validates all metadata filtering conditions supported by - the DatasetRetrieval class, including string operations, numeric comparisons, - null checks, and list operations. - - Method Signature: - ================== - def process_metadata_filter_func( - self, sequence: int, condition: str, metadata_name: str, value: Any | None, filters: list - ) -> list: - - The method builds SQLAlchemy filter expressions by: - 1. Validating value is not None (except for empty/not empty conditions) - 2. Using DatasetDocument.doc_metadata JSON field operations - 3. Adding appropriate SQLAlchemy expressions to the filters list - 4. Returning the updated filters list - - Mocking Strategy: - ================== - - Mock DatasetDocument.doc_metadata to avoid database dependencies - - Verify filter expressions are created correctly - - Test with various data types (str, int, float, list) - """ - - @pytest.fixture - def retrieval(self): - """ - Create a DatasetRetrieval instance for testing. - - Returns: - DatasetRetrieval: Instance to test process_metadata_filter_func - """ - return DatasetRetrieval() - - @pytest.fixture - def mock_doc_metadata(self): - """ - Mock the DatasetDocument.doc_metadata JSON field. - - The method uses DatasetDocument.doc_metadata[metadata_name] to access - JSON fields. We mock this to avoid database dependencies. - - Returns: - Mock: Mocked doc_metadata attribute - """ - mock_metadata_field = MagicMock() - - # Create mock for string access - mock_string_access = MagicMock() - mock_string_access.like = MagicMock() - mock_string_access.notlike = MagicMock() - mock_string_access.__eq__ = MagicMock(return_value=MagicMock()) - mock_string_access.__ne__ = MagicMock(return_value=MagicMock()) - mock_string_access.in_ = MagicMock(return_value=MagicMock()) - - # Create mock for float access (for numeric comparisons) - mock_float_access = MagicMock() - mock_float_access.__eq__ = MagicMock(return_value=MagicMock()) - mock_float_access.__ne__ = MagicMock(return_value=MagicMock()) - mock_float_access.__lt__ = MagicMock(return_value=MagicMock()) - mock_float_access.__gt__ = MagicMock(return_value=MagicMock()) - mock_float_access.__le__ = MagicMock(return_value=MagicMock()) - mock_float_access.__ge__ = MagicMock(return_value=MagicMock()) - - # Create mock for null checks - mock_null_access = MagicMock() - mock_null_access.is_ = MagicMock(return_value=MagicMock()) - mock_null_access.isnot = MagicMock(return_value=MagicMock()) - - # Setup __getitem__ to return appropriate mock based on usage - def getitem_side_effect(name): - if name in ["author", "title", "category"]: - return mock_string_access - elif name in ["year", "price", "rating"]: - return mock_float_access - else: - return mock_string_access - - mock_metadata_field.__getitem__ = MagicMock(side_effect=getitem_side_effect) - mock_metadata_field.as_string.return_value = mock_string_access - mock_metadata_field.as_float.return_value = mock_float_access - mock_metadata_field[metadata_name:str].is_ = mock_null_access.is_ - mock_metadata_field[metadata_name:str].isnot = mock_null_access.isnot - - return mock_metadata_field - - # ==================== String Condition Tests ==================== - - def test_contains_condition_string_value(self, retrieval): - """ - Test 'contains' condition with string value. - - Verifies: - - Filters list is populated with LIKE expression - - Pattern matching uses %value% syntax - """ - filters = [] - sequence = 0 - condition = "contains" - metadata_name = "author" - value = "John" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_not_contains_condition(self, retrieval): - """ - Test 'not contains' condition. - - Verifies: - - Filters list is populated with NOT LIKE expression - - Pattern matching uses %value% syntax with negation - """ - filters = [] - sequence = 0 - condition = "not contains" - metadata_name = "title" - value = "banned" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_start_with_condition(self, retrieval): - """ - Test 'start with' condition. - - Verifies: - - Filters list is populated with LIKE expression - - Pattern matching uses value% syntax - """ - filters = [] - sequence = 0 - condition = "start with" - metadata_name = "category" - value = "tech" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_end_with_condition(self, retrieval): - """ - Test 'end with' condition. - - Verifies: - - Filters list is populated with LIKE expression - - Pattern matching uses %value syntax - """ - filters = [] - sequence = 0 - condition = "end with" - metadata_name = "filename" - value = ".pdf" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== Equality Condition Tests ==================== - - def test_is_condition_with_string_value(self, retrieval): - """ - Test 'is' (=) condition with string value. - - Verifies: - - Filters list is populated with equality expression - - String comparison is used - """ - filters = [] - sequence = 0 - condition = "is" - metadata_name = "author" - value = "Jane Doe" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_equals_condition_with_string_value(self, retrieval): - """ - Test '=' condition with string value. - - Verifies: - - Same behavior as 'is' condition - - String comparison is used - """ - filters = [] - sequence = 0 - condition = "=" - metadata_name = "category" - value = "technology" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_is_condition_with_int_value(self, retrieval): - """ - Test 'is' condition with integer value. - - Verifies: - - Numeric comparison is used - - as_float() is called on the metadata field - """ - filters = [] - sequence = 0 - condition = "is" - metadata_name = "year" - value = 2023 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_is_condition_with_float_value(self, retrieval): - """ - Test 'is' condition with float value. - - Verifies: - - Numeric comparison is used - - as_float() is called on the metadata field - """ - filters = [] - sequence = 0 - condition = "is" - metadata_name = "price" - value = 19.99 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_is_not_condition_with_string_value(self, retrieval): - """ - Test 'is not' (≠) condition with string value. - - Verifies: - - Filters list is populated with inequality expression - - String comparison is used - """ - filters = [] - sequence = 0 - condition = "is not" - metadata_name = "author" - value = "Unknown" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_not_equals_condition(self, retrieval): - """ - Test '≠' condition with string value. - - Verifies: - - Same behavior as 'is not' condition - - Inequality expression is used - """ - filters = [] - sequence = 0 - condition = "≠" - metadata_name = "category" - value = "archived" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_is_not_condition_with_numeric_value(self, retrieval): - """ - Test 'is not' condition with numeric value. - - Verifies: - - Numeric inequality comparison is used - - as_float() is called on the metadata field - """ - filters = [] - sequence = 0 - condition = "is not" - metadata_name = "year" - value = 2000 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== Null Condition Tests ==================== - - def test_empty_condition(self, retrieval): - """ - Test 'empty' condition (null check). - - Verifies: - - Filters list is populated with IS NULL expression - - Value can be None for this condition - """ - filters = [] - sequence = 0 - condition = "empty" - metadata_name = "author" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_not_empty_condition(self, retrieval): - """ - Test 'not empty' condition (not null check). - - Verifies: - - Filters list is populated with IS NOT NULL expression - - Value can be None for this condition - """ - filters = [] - sequence = 0 - condition = "not empty" - metadata_name = "description" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== Numeric Comparison Tests ==================== - - def test_before_condition(self, retrieval): - """ - Test 'before' (<) condition. - - Verifies: - - Filters list is populated with less than expression - - Numeric comparison is used - """ - filters = [] - sequence = 0 - condition = "before" - metadata_name = "year" - value = 2020 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_less_than_condition(self, retrieval): - """ - Test '<' condition. - - Verifies: - - Same behavior as 'before' condition - - Less than expression is used - """ - filters = [] - sequence = 0 - condition = "<" - metadata_name = "price" - value = 100.0 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_after_condition(self, retrieval): - """ - Test 'after' (>) condition. - - Verifies: - - Filters list is populated with greater than expression - - Numeric comparison is used - """ - filters = [] - sequence = 0 - condition = "after" - metadata_name = "year" - value = 2020 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_greater_than_condition(self, retrieval): - """ - Test '>' condition. - - Verifies: - - Same behavior as 'after' condition - - Greater than expression is used - """ - filters = [] - sequence = 0 - condition = ">" - metadata_name = "rating" - value = 4.5 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_less_than_or_equal_condition_unicode(self, retrieval): - """ - Test '≤' condition. - - Verifies: - - Filters list is populated with less than or equal expression - - Numeric comparison is used - """ - filters = [] - sequence = 0 - condition = "≤" - metadata_name = "price" - value = 50.0 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_less_than_or_equal_condition_ascii(self, retrieval): - """ - Test '<=' condition. - - Verifies: - - Same behavior as '≤' condition - - Less than or equal expression is used - """ - filters = [] - sequence = 0 - condition = "<=" - metadata_name = "year" - value = 2023 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_greater_than_or_equal_condition_unicode(self, retrieval): - """ - Test '≥' condition. - - Verifies: - - Filters list is populated with greater than or equal expression - - Numeric comparison is used - """ - filters = [] - sequence = 0 - condition = "≥" - metadata_name = "rating" - value = 3.5 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_greater_than_or_equal_condition_ascii(self, retrieval): - """ - Test '>=' condition. - - Verifies: - - Same behavior as '≥' condition - - Greater than or equal expression is used - """ - filters = [] - sequence = 0 - condition = ">=" - metadata_name = "year" - value = 2000 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== List/In Condition Tests ==================== - - def test_in_condition_with_comma_separated_string(self, retrieval): - """ - Test 'in' condition with comma-separated string value. - - Verifies: - - String is split into list - - Whitespace is trimmed from each value - - IN expression is created - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = "tech, science, AI " - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_in_condition_with_list_value(self, retrieval): - """ - Test 'in' condition with list value. - - Verifies: - - List is processed correctly - - None values are filtered out - - IN expression is created with valid values - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "tags" - value = ["python", "javascript", None, "golang"] - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_in_condition_with_tuple_value(self, retrieval): - """ - Test 'in' condition with tuple value. - - Verifies: - - Tuple is processed like a list - - IN expression is created - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = ("tech", "science", "ai") - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_in_condition_with_empty_string(self, retrieval): - """ - Test 'in' condition with empty string value. - - Verifies: - - Empty string results in literal(False) filter - - No valid values to match - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = "" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - # Verify it's a literal(False) expression - # This is a bit tricky to test without access to the actual expression - - def test_in_condition_with_only_whitespace(self, retrieval): - """ - Test 'in' condition with whitespace-only string value. - - Verifies: - - Whitespace-only string results in literal(False) filter - - All values are stripped and filtered out - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = " , , " - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_in_condition_with_single_string(self, retrieval): - """ - Test 'in' condition with single non-comma string. - - Verifies: - - Single string is treated as single-item list - - IN expression is created with one value - """ - filters = [] - sequence = 0 - condition = "in" - metadata_name = "category" - value = "technology" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - # ==================== Edge Case Tests ==================== - - def test_none_value_with_non_empty_condition(self, retrieval): - """ - Test None value with conditions that require value. - - Verifies: - - Original filters list is returned unchanged - - No filter is added for None values (except empty/not empty) - """ - filters = [] - sequence = 0 - condition = "contains" - metadata_name = "author" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 0 # No filter added - - def test_none_value_with_equals_condition(self, retrieval): - """ - Test None value with 'is' (=) condition. - - Verifies: - - Original filters list is returned unchanged - - No filter is added for None values - """ - filters = [] - sequence = 0 - condition = "is" - metadata_name = "author" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 0 - - def test_none_value_with_numeric_condition(self, retrieval): - """ - Test None value with numeric comparison condition. - - Verifies: - - Original filters list is returned unchanged - - No filter is added for None values - """ - filters = [] - sequence = 0 - condition = ">" - metadata_name = "year" - value = None - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 0 - - def test_existing_filters_preserved(self, retrieval): - """ - Test that existing filters are preserved. - - Verifies: - - Existing filters in the list are not removed - - New filters are appended to the list - """ - existing_filter = MagicMock() - filters = [existing_filter] - sequence = 0 - condition = "contains" - metadata_name = "author" - value = "test" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 2 - assert filters[0] == existing_filter - - def test_multiple_filters_accumulated(self, retrieval): - """ - Test multiple calls to accumulate filters. - - Verifies: - - Each call adds a new filter to the list - - All filters are preserved across calls - """ - filters = [] - - # First filter - retrieval.process_metadata_filter_func(0, "contains", "author", "John", filters) - assert len(filters) == 1 - - # Second filter - retrieval.process_metadata_filter_func(1, ">", "year", 2020, filters) - assert len(filters) == 2 - - # Third filter - retrieval.process_metadata_filter_func(2, "is", "category", "tech", filters) - assert len(filters) == 3 - - def test_unknown_condition(self, retrieval): - """ - Test unknown/unsupported condition. - - Verifies: - - Original filters list is returned unchanged - - No filter is added for unknown conditions - """ - filters = [] - sequence = 0 - condition = "unknown_condition" - metadata_name = "author" - value = "test" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 0 - - def test_empty_string_value_with_contains(self, retrieval): - """ - Test empty string value with 'contains' condition. - - Verifies: - - Filter is added even with empty string - - LIKE expression is created - """ - filters = [] - sequence = 0 - condition = "contains" - metadata_name = "author" - value = "" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_special_characters_in_value(self, retrieval): - """ - Test special characters in value string. - - Verifies: - - Special characters are handled in value - - LIKE expression is created correctly - """ - filters = [] - sequence = 0 - condition = "contains" - metadata_name = "title" - value = "C++ & Python's features" - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_zero_value_with_numeric_condition(self, retrieval): - """ - Test zero value with numeric comparison condition. - - Verifies: - - Zero is treated as valid value - - Numeric comparison is performed - """ - filters = [] - sequence = 0 - condition = ">" - metadata_name = "price" - value = 0 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_negative_value_with_numeric_condition(self, retrieval): - """ - Test negative value with numeric comparison condition. - - Verifies: - - Negative numbers are handled correctly - - Numeric comparison is performed - """ - filters = [] - sequence = 0 - condition = "<" - metadata_name = "temperature" - value = -10.5 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 - - def test_float_value_with_integer_comparison(self, retrieval): - """ - Test float value with numeric comparison condition. - - Verifies: - - Float values work correctly - - Numeric comparison is performed - """ - filters = [] - sequence = 0 - condition = ">=" - metadata_name = "rating" - value = 4.5 - - result = retrieval.process_metadata_filter_func(sequence, condition, metadata_name, value, filters) - - assert result == filters - assert len(filters) == 1 diff --git a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py index 4bc802dc23..48782515d0 100644 --- a/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py +++ b/api/tests/unit_tests/core/rag/retrieval/test_dataset_retrieval_methods.py @@ -6,7 +6,7 @@ import pytest from core.rag.models.document import Document from core.rag.retrieval.dataset_retrieval import DatasetRetrieval from core.workflow.nodes.knowledge_retrieval import exc -from core.workflow.repositories.rag_retrieval_protocol import KnowledgeRetrievalRequest +from core.workflow.nodes.knowledge_retrieval.retrieval import KnowledgeRetrievalRequest from models.dataset import Dataset # ==================== Helper Functions ==================== diff --git a/api/tests/unit_tests/core/rag/retrieval/test_knowledge_retrieval.py b/api/tests/unit_tests/core/rag/retrieval/test_knowledge_retrieval.py deleted file mode 100644 index 5f461d53ae..0000000000 --- a/api/tests/unit_tests/core/rag/retrieval/test_knowledge_retrieval.py +++ /dev/null @@ -1,113 +0,0 @@ -import threading -from unittest.mock import Mock, patch -from uuid import uuid4 - -import pytest -from flask import Flask, current_app - -from core.rag.models.document import Document -from core.rag.retrieval.dataset_retrieval import DatasetRetrieval -from models.dataset import Dataset - - -class TestRetrievalService: - @pytest.fixture - def mock_dataset(self) -> Dataset: - dataset = Mock(spec=Dataset) - dataset.id = str(uuid4()) - dataset.tenant_id = str(uuid4()) - dataset.name = "test_dataset" - dataset.indexing_technique = "high_quality" - dataset.provider = "dify" - return dataset - - def test_multiple_retrieve_reranking_with_app_context(self, mock_dataset): - """ - Repro test for current bug: - reranking runs after `with flask_app.app_context():` exits. - `_multiple_retrieve_thread` catches exceptions and stores them into `thread_exceptions`, - so we must assert from that list (not from an outer try/except). - """ - dataset_retrieval = DatasetRetrieval() - flask_app = Flask(__name__) - tenant_id = str(uuid4()) - - # second dataset to ensure dataset_count > 1 reranking branch - secondary_dataset = Mock(spec=Dataset) - secondary_dataset.id = str(uuid4()) - secondary_dataset.provider = "dify" - secondary_dataset.indexing_technique = "high_quality" - - # retriever returns 1 doc into internal list (all_documents_item) - document = Document( - page_content="Context aware doc", - metadata={ - "doc_id": "doc1", - "score": 0.95, - "document_id": str(uuid4()), - "dataset_id": mock_dataset.id, - }, - provider="dify", - ) - - def fake_retriever( - flask_app, dataset_id, query, top_k, all_documents, document_ids_filter, metadata_condition, attachment_ids - ): - all_documents.append(document) - - called = {"init": 0, "invoke": 0} - - class ContextRequiredPostProcessor: - def __init__(self, *args, **kwargs): - called["init"] += 1 - # will raise RuntimeError if no Flask app context exists - _ = current_app.name - - def invoke(self, *args, **kwargs): - called["invoke"] += 1 - _ = current_app.name - return kwargs.get("documents") or args[1] - - # output list from _multiple_retrieve_thread - all_documents: list[Document] = [] - - # IMPORTANT: _multiple_retrieve_thread swallows exceptions and appends them here - thread_exceptions: list[Exception] = [] - - def target(): - with patch.object(dataset_retrieval, "_retriever", side_effect=fake_retriever): - with patch( - "core.rag.retrieval.dataset_retrieval.DataPostProcessor", - ContextRequiredPostProcessor, - ): - dataset_retrieval._multiple_retrieve_thread( - flask_app=flask_app, - available_datasets=[mock_dataset, secondary_dataset], - metadata_condition=None, - metadata_filter_document_ids=None, - all_documents=all_documents, - tenant_id=tenant_id, - reranking_enable=True, - reranking_mode="reranking_model", - reranking_model={ - "reranking_provider_name": "cohere", - "reranking_model_name": "rerank-v2", - }, - weights=None, - top_k=3, - score_threshold=0.0, - query="test query", - attachment_id=None, - dataset_count=2, # force reranking branch - thread_exceptions=thread_exceptions, # ✅ key - ) - - t = threading.Thread(target=target) - t.start() - t.join() - - # Ensure reranking branch was actually executed - assert called["init"] >= 1, "DataPostProcessor was never constructed; reranking branch may not have run." - - # Current buggy code should record an exception (not raise it) - assert not thread_exceptions, thread_exceptions diff --git a/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_function_call_router.py b/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_function_call_router.py new file mode 100644 index 0000000000..cfa9094e12 --- /dev/null +++ b/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_function_call_router.py @@ -0,0 +1,100 @@ +from unittest.mock import Mock + +from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter +from dify_graph.model_runtime.entities.llm_entities import LLMUsage + + +class TestFunctionCallMultiDatasetRouter: + def test_invoke_returns_none_when_no_tools(self) -> None: + router = FunctionCallMultiDatasetRouter() + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[], + model_config=Mock(), + model_instance=Mock(), + ) + + assert dataset_id is None + assert usage == LLMUsage.empty_usage() + + def test_invoke_returns_single_tool_directly(self) -> None: + router = FunctionCallMultiDatasetRouter() + tool = Mock() + tool.name = "dataset-1" + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool], + model_config=Mock(), + model_instance=Mock(), + ) + + assert dataset_id == "dataset-1" + assert usage == LLMUsage.empty_usage() + + def test_invoke_returns_tool_from_model_response(self) -> None: + router = FunctionCallMultiDatasetRouter() + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_2 = Mock() + tool_2.name = "dataset-2" + usage = LLMUsage.empty_usage() + response = Mock() + response.usage = usage + response.message.tool_calls = [Mock(function=Mock())] + response.message.tool_calls[0].function.name = "dataset-2" + model_instance = Mock() + model_instance.invoke_llm.return_value = response + + dataset_id, returned_usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=model_instance, + ) + + assert dataset_id == "dataset-2" + assert returned_usage == usage + model_instance.invoke_llm.assert_called_once() + + def test_invoke_returns_none_when_no_tool_calls(self) -> None: + router = FunctionCallMultiDatasetRouter() + response = Mock() + response.usage = LLMUsage.empty_usage() + response.message.tool_calls = [] + model_instance = Mock() + model_instance.invoke_llm.return_value = response + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_2 = Mock() + tool_2.name = "dataset-2" + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=model_instance, + ) + + assert dataset_id is None + assert usage == response.usage + + def test_invoke_returns_empty_usage_when_model_raises(self) -> None: + router = FunctionCallMultiDatasetRouter() + model_instance = Mock() + model_instance.invoke_llm.side_effect = RuntimeError("boom") + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_2 = Mock() + tool_2.name = "dataset-2" + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=model_instance, + ) + + assert dataset_id is None + assert usage == LLMUsage.empty_usage() diff --git a/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_react_route.py b/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_react_route.py new file mode 100644 index 0000000000..e429563739 --- /dev/null +++ b/api/tests/unit_tests/core/rag/retrieval/test_multi_dataset_react_route.py @@ -0,0 +1,252 @@ +from types import SimpleNamespace +from unittest.mock import Mock, patch + +from core.rag.retrieval.output_parser.react_output import ReactAction, ReactFinish +from core.rag.retrieval.router.multi_dataset_react_route import ReactMultiDatasetRouter +from dify_graph.model_runtime.entities.llm_entities import LLMUsage +from dify_graph.model_runtime.entities.message_entities import PromptMessageRole + + +class TestReactMultiDatasetRouter: + def test_invoke_returns_none_when_no_tools(self) -> None: + router = ReactMultiDatasetRouter() + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[], + model_config=Mock(), + model_instance=Mock(), + user_id="u1", + tenant_id="t1", + ) + + assert dataset_id is None + assert usage == LLMUsage.empty_usage() + + def test_invoke_returns_single_tool_directly(self) -> None: + router = ReactMultiDatasetRouter() + tool = Mock() + tool.name = "dataset-1" + + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool], + model_config=Mock(), + model_instance=Mock(), + user_id="u1", + tenant_id="t1", + ) + + assert dataset_id == "dataset-1" + assert usage == LLMUsage.empty_usage() + + def test_invoke_returns_tool_from_react_invoke(self) -> None: + router = ReactMultiDatasetRouter() + usage = LLMUsage.empty_usage() + tool_1 = Mock(name="dataset-1") + tool_1.name = "dataset-1" + tool_2 = Mock(name="dataset-2") + tool_2.name = "dataset-2" + + with patch.object(router, "_react_invoke", return_value=("dataset-2", usage)) as mock_react: + dataset_id, returned_usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=Mock(), + user_id="u1", + tenant_id="t1", + ) + + mock_react.assert_called_once() + assert dataset_id == "dataset-2" + assert returned_usage == usage + + def test_invoke_handles_react_invoke_errors(self) -> None: + router = ReactMultiDatasetRouter() + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_2 = Mock() + tool_2.name = "dataset-2" + + with patch.object(router, "_react_invoke", side_effect=RuntimeError("boom")): + dataset_id, usage = router.invoke( + query="python", + dataset_tools=[tool_1, tool_2], + model_config=Mock(), + model_instance=Mock(), + user_id="u1", + tenant_id="t1", + ) + + assert dataset_id is None + assert usage == LLMUsage.empty_usage() + + def test_react_invoke_returns_action_tool(self) -> None: + router = ReactMultiDatasetRouter() + model_config = Mock() + model_config.mode = "chat" + model_config.parameters = {"temperature": 0.1} + usage = LLMUsage.empty_usage() + tools = [Mock(name="dataset-1"), Mock(name="dataset-2")] + tools[0].name = "dataset-1" + tools[0].description = "desc" + tools[1].name = "dataset-2" + tools[1].description = "desc" + + with ( + patch.object(router, "create_chat_prompt", return_value=[Mock()]) as mock_chat_prompt, + patch( + "core.rag.retrieval.router.multi_dataset_react_route.AdvancedPromptTransform" + ) as mock_prompt_transform, + patch.object(router, "_invoke_llm", return_value=('{"action":"dataset-2","action_input":{}}', usage)), + patch("core.rag.retrieval.router.multi_dataset_react_route.StructuredChatOutputParser") as mock_parser_cls, + ): + mock_prompt_transform.return_value.get_prompt.return_value = [Mock()] + mock_parser_cls.return_value.parse.return_value = ReactAction("dataset-2", {}, "log") + + dataset_id, returned_usage = router._react_invoke( + query="python", + model_config=model_config, + model_instance=Mock(), + tools=tools, + user_id="u1", + tenant_id="t1", + ) + + mock_chat_prompt.assert_called_once() + assert dataset_id == "dataset-2" + assert returned_usage == usage + + def test_react_invoke_returns_none_for_finish(self) -> None: + router = ReactMultiDatasetRouter() + model_config = Mock() + model_config.mode = "completion" + model_config.parameters = {"temperature": 0.1} + usage = LLMUsage.empty_usage() + tool = Mock() + tool.name = "dataset-1" + tool.description = "desc" + + with ( + patch.object(router, "create_completion_prompt", return_value=Mock()) as mock_completion_prompt, + patch( + "core.rag.retrieval.router.multi_dataset_react_route.AdvancedPromptTransform" + ) as mock_prompt_transform, + patch.object( + router, "_invoke_llm", return_value=('{"action":"Final Answer","action_input":"done"}', usage) + ), + patch("core.rag.retrieval.router.multi_dataset_react_route.StructuredChatOutputParser") as mock_parser_cls, + ): + mock_prompt_transform.return_value.get_prompt.return_value = [Mock()] + mock_parser_cls.return_value.parse.return_value = ReactFinish({"output": "done"}, "log") + + dataset_id, returned_usage = router._react_invoke( + query="python", + model_config=model_config, + model_instance=Mock(), + tools=[tool], + user_id="u1", + tenant_id="t1", + ) + + mock_completion_prompt.assert_called_once() + assert dataset_id is None + assert returned_usage == usage + + def test_invoke_llm_and_handle_result(self) -> None: + router = ReactMultiDatasetRouter() + usage = LLMUsage.empty_usage() + delta = SimpleNamespace(message=SimpleNamespace(content="part"), usage=usage) + chunk = SimpleNamespace(model="m1", prompt_messages=[Mock()], delta=delta) + model_instance = Mock() + model_instance.invoke_llm.return_value = iter([chunk]) + + with patch("core.rag.retrieval.router.multi_dataset_react_route.deduct_llm_quota") as mock_deduct: + text, returned_usage = router._invoke_llm( + completion_param={"temperature": 0.1}, + model_instance=model_instance, + prompt_messages=[Mock()], + stop=["Observation:"], + user_id="u1", + tenant_id="t1", + ) + + assert text == "part" + assert returned_usage == usage + mock_deduct.assert_called_once() + + def test_handle_invoke_result_with_empty_usage(self) -> None: + router = ReactMultiDatasetRouter() + delta = SimpleNamespace(message=SimpleNamespace(content="part"), usage=None) + chunk = SimpleNamespace(model="m1", prompt_messages=[Mock()], delta=delta) + + text, usage = router._handle_invoke_result(iter([chunk])) + + assert text == "part" + assert usage == LLMUsage.empty_usage() + + def test_create_chat_prompt(self) -> None: + router = ReactMultiDatasetRouter() + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_1.description = "d1" + tool_2 = Mock() + tool_2.name = "dataset-2" + tool_2.description = "d2" + + chat_prompt = router.create_chat_prompt(query="python", tools=[tool_1, tool_2]) + assert len(chat_prompt) == 2 + assert chat_prompt[0].role == PromptMessageRole.SYSTEM + assert chat_prompt[1].role == PromptMessageRole.USER + assert "dataset-1" in chat_prompt[0].text + assert "dataset-2" in chat_prompt[0].text + + def test_create_completion_prompt(self) -> None: + router = ReactMultiDatasetRouter() + tool_1 = Mock() + tool_1.name = "dataset-1" + tool_1.description = "d1" + tool_2 = Mock() + tool_2.name = "dataset-2" + tool_2.description = "d2" + + completion_prompt = router.create_completion_prompt(tools=[tool_1, tool_2]) + assert "dataset-1: d1" in completion_prompt.text + assert "dataset-2: d2" in completion_prompt.text + + def test_react_invoke_uses_completion_branch_for_non_chat_mode(self) -> None: + router = ReactMultiDatasetRouter() + model_config = Mock() + model_config.mode = "unknown-mode" + model_config.parameters = {} + tool = Mock() + tool.name = "dataset-1" + tool.description = "desc" + + with ( + patch.object(router, "create_completion_prompt", return_value=Mock()) as mock_completion_prompt, + patch( + "core.rag.retrieval.router.multi_dataset_react_route.AdvancedPromptTransform" + ) as mock_prompt_transform, + patch.object( + router, + "_invoke_llm", + return_value=('{"action":"Final Answer","action_input":"done"}', LLMUsage.empty_usage()), + ), + patch("core.rag.retrieval.router.multi_dataset_react_route.StructuredChatOutputParser") as mock_parser_cls, + ): + mock_prompt_transform.return_value.get_prompt.return_value = [Mock()] + mock_parser_cls.return_value.parse.return_value = ReactFinish({"output": "done"}, "log") + dataset_id, usage = router._react_invoke( + query="python", + model_config=model_config, + model_instance=Mock(), + tools=[tool], + user_id="u1", + tenant_id="t1", + ) + + mock_completion_prompt.assert_called_once() + assert dataset_id is None + assert usage == LLMUsage.empty_usage() diff --git a/api/tests/unit_tests/core/rag/retrieval/test_structured_chat_output_parser.py b/api/tests/unit_tests/core/rag/retrieval/test_structured_chat_output_parser.py new file mode 100644 index 0000000000..c8fa0ea62f --- /dev/null +++ b/api/tests/unit_tests/core/rag/retrieval/test_structured_chat_output_parser.py @@ -0,0 +1,69 @@ +import pytest + +from core.rag.retrieval.output_parser.react_output import ReactAction, ReactFinish +from core.rag.retrieval.output_parser.structured_chat import StructuredChatOutputParser + + +class TestStructuredChatOutputParser: + def test_parse_action_without_action_input(self) -> None: + parser = StructuredChatOutputParser() + text = 'Action:\n```json\n{"action":"some_action"}\n```' + result = parser.parse(text) + + assert isinstance(result, ReactAction) + assert result.tool == "some_action" + assert result.tool_input == {} + + def test_parse_json_without_action_key(self) -> None: + parser = StructuredChatOutputParser() + text = 'Action:\n```json\n{"not_action":"search"}\n```' + with pytest.raises(ValueError, match="Could not parse LLM output"): + parser.parse(text) + + def test_parse_returns_action_for_tool_call(self) -> None: + parser = StructuredChatOutputParser() + text = ( + 'Thought: call tool\nAction:\n```json\n{"action":"search_dataset","action_input":{"query":"python"}}\n```' + ) + + result = parser.parse(text) + + assert isinstance(result, ReactAction) + assert result.tool == "search_dataset" + assert result.tool_input == {"query": "python"} + assert result.log == text + + def test_parse_returns_finish_for_final_answer(self) -> None: + parser = StructuredChatOutputParser() + text = 'Thought: done\nAction:\n```json\n{"action":"Final Answer","action_input":"final text"}\n```' + + result = parser.parse(text) + + assert isinstance(result, ReactFinish) + assert result.return_values == {"output": "final text"} + assert result.log == text + + def test_parse_returns_finish_for_json_array_payload(self) -> None: + parser = StructuredChatOutputParser() + text = 'Action:\n```json\n[{"action":"search","action_input":"hello"}]\n```' + result = parser.parse(text) + + assert isinstance(result, ReactFinish) + assert result.return_values == {"output": text} + assert result.log == text + + def test_parse_returns_finish_for_plain_text(self) -> None: + parser = StructuredChatOutputParser() + text = "No structured action block" + + result = parser.parse(text) + + assert isinstance(result, ReactFinish) + assert result.return_values == {"output": text} + + def test_parse_raises_value_error_for_invalid_json(self) -> None: + parser = StructuredChatOutputParser() + text = 'Action:\n```json\n{"action":"search","action_input": }\n```' + + with pytest.raises(ValueError, match="Could not parse LLM output"): + parser.parse(text) diff --git a/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py b/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py index 943a9e5712..976de10d89 100644 --- a/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py +++ b/api/tests/unit_tests/core/rag/splitter/test_text_splitter.py @@ -125,7 +125,11 @@ Run with coverage: - Tests are organized by functionality in classes for better organization """ +import asyncio import string +import sys +import types +from inspect import currentframe from unittest.mock import Mock, patch import pytest @@ -604,6 +608,51 @@ class TestRecursiveCharacterTextSplitter: assert "def hello_world" in combined or "hello_world" in combined +class TestTextSplitterBasePaths: + """Target uncovered base TextSplitter paths.""" + + def test_from_huggingface_tokenizer_success_path(self): + """Cover from_huggingface_tokenizer success branch with mocked transformers.""" + + class _FakePreTrainedTokenizerBase: + pass + + class _FakeTokenizer(_FakePreTrainedTokenizerBase): + def encode(self, text: str): + return [ord(c) for c in text] + + fake_transformers = types.SimpleNamespace(PreTrainedTokenizerBase=_FakePreTrainedTokenizerBase) + with patch.dict(sys.modules, {"transformers": fake_transformers}): + splitter = RecursiveCharacterTextSplitter.from_huggingface_tokenizer( + tokenizer=_FakeTokenizer(), + chunk_size=5, + chunk_overlap=1, + ) + + chunks = splitter.split_text("abcdef") + assert chunks + + def test_from_huggingface_tokenizer_import_error(self): + """Cover from_huggingface_tokenizer import-error branch.""" + with patch.dict(sys.modules, {"transformers": None}): + with pytest.raises(ValueError, match="Could not import transformers"): + RecursiveCharacterTextSplitter.from_huggingface_tokenizer(tokenizer=object(), chunk_size=5) + + def test_atransform_documents_raises_not_implemented(self): + """Cover atransform_documents NotImplemented branch.""" + splitter = RecursiveCharacterTextSplitter(chunk_size=20, chunk_overlap=5) + with pytest.raises(NotImplementedError): + asyncio.run(splitter.atransform_documents([Document(page_content="x", metadata={})])) + + def test_merge_splits_logs_warning_for_oversized_total(self): + """Cover logger.warning path in _merge_splits.""" + splitter = RecursiveCharacterTextSplitter(chunk_size=5, chunk_overlap=1) + with patch("core.rag.splitter.text_splitter.logger.warning") as mock_warning: + merged = splitter._merge_splits(["abcdefghij", "b"], "", [10, 1]) + assert merged + mock_warning.assert_called_once() + + # ============================================================================ # Test TokenTextSplitter # ============================================================================ @@ -662,6 +711,44 @@ class TestTokenTextSplitter: except ImportError: pytest.skip("tiktoken not installed") + def test_initialization_and_split_with_mocked_tiktoken_encoding(self): + """Cover TokenTextSplitter __init__ else-path and split_text logic.""" + + class _FakeEncoding: + def encode(self, text: str, allowed_special=None, disallowed_special=None): + return [ord(c) for c in text] + + def decode(self, token_ids: list[int]) -> str: + return "".join(chr(i) for i in token_ids) + + fake_tiktoken = types.SimpleNamespace(get_encoding=lambda name: _FakeEncoding()) + with patch.dict(sys.modules, {"tiktoken": fake_tiktoken}): + splitter = TokenTextSplitter(encoding_name="gpt2", chunk_size=4, chunk_overlap=1) + result = splitter.split_text("abcdefgh") + + assert result + assert all(isinstance(chunk, str) for chunk in result) + + def test_initialization_with_model_name_uses_encoding_for_model(self): + """Cover TokenTextSplitter model_name init branch.""" + + class _FakeEncoding: + def encode(self, text: str, allowed_special=None, disallowed_special=None): + return [ord(c) for c in text] + + def decode(self, token_ids: list[int]) -> str: + return "".join(chr(i) for i in token_ids) + + fake_encoding = _FakeEncoding() + fake_tiktoken = types.SimpleNamespace( + encoding_for_model=lambda model_name: fake_encoding, + get_encoding=lambda name: _FakeEncoding(), + ) + with patch.dict(sys.modules, {"tiktoken": fake_tiktoken}): + splitter = TokenTextSplitter(model_name="gpt-4", chunk_size=5, chunk_overlap=1) + + assert splitter._tokenizer is fake_encoding + # ============================================================================ # Test EnhanceRecursiveCharacterTextSplitter @@ -731,6 +818,50 @@ class TestEnhanceRecursiveCharacterTextSplitter: assert len(result) > 0 assert all(isinstance(chunk, str) for chunk in result) + def test_from_encoder_internal_token_encoder_paths(self): + """ + Test internal _token_encoder branches by capturing local closure from frame. + + This validates: + - empty texts path + - embedding model path + - GPT2Tokenizer fallback path + - _character_encoder empty-path branch + """ + + class _SpySplitter(EnhanceRecursiveCharacterTextSplitter): + captured_token_encoder = None + captured_character_encoder = None + + def __init__(self, **kwargs): + frame = currentframe() + if frame and frame.f_back: + _SpySplitter.captured_token_encoder = frame.f_back.f_locals.get("_token_encoder") + _SpySplitter.captured_character_encoder = frame.f_back.f_locals.get("_character_encoder") + super().__init__(**kwargs) + + mock_model = Mock() + mock_model.get_text_embedding_num_tokens.return_value = [3, 5] + + _SpySplitter.from_encoder(embedding_model_instance=mock_model, chunk_size=10, chunk_overlap=1) + token_encoder = _SpySplitter.captured_token_encoder + character_encoder = _SpySplitter.captured_character_encoder + + assert token_encoder is not None + assert character_encoder is not None + assert token_encoder([]) == [] + assert token_encoder(["abc", "defgh"]) == [3, 5] + assert character_encoder([]) == [] + + with patch( + "core.rag.splitter.fixed_text_splitter.GPT2Tokenizer.get_num_tokens", + side_effect=lambda text: len(text) + 1, + ): + _SpySplitter.from_encoder(embedding_model_instance=None, chunk_size=10, chunk_overlap=1) + token_encoder_without_model = _SpySplitter.captured_token_encoder + assert token_encoder_without_model is not None + assert token_encoder_without_model(["ab", "cdef"]) == [3, 5] + # ============================================================================ # Test FixedRecursiveCharacterTextSplitter @@ -908,6 +1039,56 @@ class TestFixedRecursiveCharacterTextSplitter: chunks = splitter.split_text(data) assert chunks == ["chunk 1\n\nsubchunk 1.\nsubchunk 2.", "chunk 2\n\nsubchunk 1\nsubchunk 2."] + def test_recursive_split_keep_separator_and_recursive_fallback(self): + """Cover keep-separator split branch and recursive _split_text fallback.""" + text = "short." + ("x" * 60) + splitter = FixedRecursiveCharacterTextSplitter( + fixed_separator="", + separators=[".", " ", ""], + chunk_size=10, + chunk_overlap=2, + keep_separator=True, + ) + + chunks = splitter.recursive_split_text(text) + + assert chunks + assert any("short." in chunk for chunk in chunks) + assert any(len(chunk) <= 12 for chunk in chunks) + + def test_recursive_split_newline_separator_filtering(self): + """Cover newline-specific empty filtering branch.""" + text = "line1\n\nline2\n\nline3" + splitter = FixedRecursiveCharacterTextSplitter( + fixed_separator="", + separators=["\n", ""], + chunk_size=50, + chunk_overlap=5, + ) + + chunks = splitter.recursive_split_text(text) + + assert chunks + assert all(chunk != "" for chunk in chunks) + assert "line1" in "".join(chunks) + assert "line2" in "".join(chunks) + assert "line3" in "".join(chunks) + + def test_recursive_split_without_new_separator_appends_long_chunk(self): + """Cover branch where no further separators exist and long split is appended directly.""" + text = "aa\n" + ("b" * 40) + splitter = FixedRecursiveCharacterTextSplitter( + fixed_separator="", + separators=["\n"], + chunk_size=10, + chunk_overlap=2, + ) + + chunks = splitter.recursive_split_text(text) + + assert "aa" in chunks + assert any(len(chunk) >= 40 for chunk in chunks) + # ============================================================================ # Test Metadata Preservation diff --git a/api/tests/unit_tests/core/repositories/test_celery_workflow_execution_repository.py b/api/tests/unit_tests/core/repositories/test_celery_workflow_execution_repository.py index e6d0371cd5..e7eecfa297 100644 --- a/api/tests/unit_tests/core/repositories/test_celery_workflow_execution_repository.py +++ b/api/tests/unit_tests/core/repositories/test_celery_workflow_execution_repository.py @@ -11,7 +11,7 @@ from uuid import uuid4 import pytest from core.repositories.celery_workflow_execution_repository import CeleryWorkflowExecutionRepository -from core.workflow.entities.workflow_execution import WorkflowExecution, WorkflowType +from dify_graph.entities.workflow_execution import WorkflowExecution, WorkflowType from libs.datetime_utils import naive_utc_now from models import Account, EndUser from models.enums import WorkflowRunTriggeredFrom diff --git a/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py b/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py index f6211f4cca..2a83a4e802 100644 --- a/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py +++ b/api/tests/unit_tests/core/repositories/test_celery_workflow_node_execution_repository.py @@ -11,12 +11,12 @@ from uuid import uuid4 import pytest from core.repositories.celery_workflow_node_execution_repository import CeleryWorkflowNodeExecutionRepository -from core.workflow.entities.workflow_node_execution import ( +from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from core.workflow.enums import NodeType -from core.workflow.repositories.workflow_node_execution_repository import OrderConfig +from dify_graph.enums import BuiltinNodeTypes +from dify_graph.repositories.workflow_node_execution_repository import OrderConfig from libs.datetime_utils import naive_utc_now from models import Account, EndUser from models.workflow import WorkflowNodeExecutionTriggeredFrom @@ -61,7 +61,7 @@ def sample_workflow_node_execution(): workflow_execution_id=str(uuid4()), index=1, node_id="test_node", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", inputs={"input1": "value1"}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -259,7 +259,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=1, node_id="node1", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Node 1", inputs={"input1": "value1"}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -272,7 +272,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=2, node_id="node2", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Node 2", inputs={"input2": "value2"}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -310,7 +310,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=2, node_id="node2", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Node 2", inputs={}, status=WorkflowNodeExecutionStatus.RUNNING, @@ -323,7 +323,7 @@ class TestCeleryWorkflowNodeExecutionRepository: workflow_execution_id=workflow_run_id, index=1, node_id="node1", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Node 1", inputs={}, status=WorkflowNodeExecutionStatus.RUNNING, diff --git a/api/tests/unit_tests/core/repositories/test_factory.py b/api/tests/unit_tests/core/repositories/test_factory.py index 7f1e2c5e5b..fe9eed0307 100644 --- a/api/tests/unit_tests/core/repositories/test_factory.py +++ b/api/tests/unit_tests/core/repositories/test_factory.py @@ -12,8 +12,8 @@ from sqlalchemy.engine import Engine from sqlalchemy.orm import sessionmaker from core.repositories.factory import DifyCoreRepositoryFactory, RepositoryImportError -from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository -from core.workflow.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository +from dify_graph.repositories.workflow_execution_repository import WorkflowExecutionRepository +from dify_graph.repositories.workflow_node_execution_repository import WorkflowNodeExecutionRepository from libs.module_loading import import_string from models import Account, EndUser from models.enums import WorkflowRunTriggeredFrom diff --git a/api/tests/unit_tests/core/repositories/test_human_input_form_repository_impl.py b/api/tests/unit_tests/core/repositories/test_human_input_form_repository_impl.py index 811ed2143b..9af4d12664 100644 --- a/api/tests/unit_tests/core/repositories/test_human_input_form_repository_impl.py +++ b/api/tests/unit_tests/core/repositories/test_human_input_form_repository_impl.py @@ -5,7 +5,6 @@ from __future__ import annotations import dataclasses from datetime import datetime from types import SimpleNamespace -from unittest.mock import MagicMock import pytest @@ -15,7 +14,7 @@ from core.repositories.human_input_repository import ( HumanInputFormSubmissionRepository, _WorkspaceMemberInfo, ) -from core.workflow.nodes.human_input.entities import ( +from dify_graph.nodes.human_input.entities import ( EmailDeliveryConfig, EmailDeliveryMethod, EmailRecipients, @@ -24,7 +23,7 @@ from core.workflow.nodes.human_input.entities import ( MemberRecipient, UserAction, ) -from core.workflow.nodes.human_input.enums import HumanInputFormKind, HumanInputFormStatus +from dify_graph.nodes.human_input.enums import HumanInputFormKind, HumanInputFormStatus from libs.datetime_utils import naive_utc_now from models.human_input import ( EmailExternalRecipientPayload, @@ -35,7 +34,7 @@ from models.human_input import ( def _build_repository() -> HumanInputFormRepositoryImpl: - return HumanInputFormRepositoryImpl(session_factory=MagicMock(), tenant_id="tenant-id") + return HumanInputFormRepositoryImpl(tenant_id="tenant-id") def _patch_recipient_factory(monkeypatch: pytest.MonkeyPatch) -> list[SimpleNamespace]: @@ -389,8 +388,21 @@ def _session_factory(session: _FakeSession): return _factory +def _patch_repo_session_factory(monkeypatch: pytest.MonkeyPatch, session: _FakeSession) -> None: + """Patch repository's global session factory to return our fake session. + + The repositories under test now use a global session factory; patch its + create_session method so unit tests don't hit a real database. + """ + monkeypatch.setattr( + "core.repositories.human_input_repository.session_factory.create_session", + _session_factory(session), + raising=True, + ) + + class TestHumanInputFormRepositoryImplPublicMethods: - def test_get_form_returns_entity_and_recipients(self): + def test_get_form_returns_entity_and_recipients(self, monkeypatch: pytest.MonkeyPatch): form = _DummyForm( id="form-1", workflow_run_id="run-1", @@ -408,7 +420,8 @@ class TestHumanInputFormRepositoryImplPublicMethods: access_token="token-123", ) session = _FakeSession(scalars_results=[form, [recipient]]) - repo = HumanInputFormRepositoryImpl(_session_factory(session), tenant_id="tenant-id") + _patch_repo_session_factory(monkeypatch, session) + repo = HumanInputFormRepositoryImpl(tenant_id="tenant-id") entity = repo.get_form(form.workflow_run_id, form.node_id) @@ -418,13 +431,14 @@ class TestHumanInputFormRepositoryImplPublicMethods: assert len(entity.recipients) == 1 assert entity.recipients[0].token == "token-123" - def test_get_form_returns_none_when_missing(self): + def test_get_form_returns_none_when_missing(self, monkeypatch: pytest.MonkeyPatch): session = _FakeSession(scalars_results=[None]) - repo = HumanInputFormRepositoryImpl(_session_factory(session), tenant_id="tenant-id") + _patch_repo_session_factory(monkeypatch, session) + repo = HumanInputFormRepositoryImpl(tenant_id="tenant-id") assert repo.get_form("run-1", "node-1") is None - def test_get_form_returns_unsubmitted_state(self): + def test_get_form_returns_unsubmitted_state(self, monkeypatch: pytest.MonkeyPatch): form = _DummyForm( id="form-1", workflow_run_id="run-1", @@ -436,7 +450,8 @@ class TestHumanInputFormRepositoryImplPublicMethods: expiration_time=naive_utc_now(), ) session = _FakeSession(scalars_results=[form, []]) - repo = HumanInputFormRepositoryImpl(_session_factory(session), tenant_id="tenant-id") + _patch_repo_session_factory(monkeypatch, session) + repo = HumanInputFormRepositoryImpl(tenant_id="tenant-id") entity = repo.get_form(form.workflow_run_id, form.node_id) @@ -445,7 +460,7 @@ class TestHumanInputFormRepositoryImplPublicMethods: assert entity.selected_action_id is None assert entity.submitted_data is None - def test_get_form_returns_submission_when_completed(self): + def test_get_form_returns_submission_when_completed(self, monkeypatch: pytest.MonkeyPatch): form = _DummyForm( id="form-1", workflow_run_id="run-1", @@ -460,7 +475,8 @@ class TestHumanInputFormRepositoryImplPublicMethods: submitted_at=naive_utc_now(), ) session = _FakeSession(scalars_results=[form, []]) - repo = HumanInputFormRepositoryImpl(_session_factory(session), tenant_id="tenant-id") + _patch_repo_session_factory(monkeypatch, session) + repo = HumanInputFormRepositoryImpl(tenant_id="tenant-id") entity = repo.get_form(form.workflow_run_id, form.node_id) @@ -471,7 +487,7 @@ class TestHumanInputFormRepositoryImplPublicMethods: class TestHumanInputFormSubmissionRepository: - def test_get_by_token_returns_record(self): + def test_get_by_token_returns_record(self, monkeypatch: pytest.MonkeyPatch): form = _DummyForm( id="form-1", workflow_run_id="run-1", @@ -490,7 +506,8 @@ class TestHumanInputFormSubmissionRepository: form=form, ) session = _FakeSession(scalars_result=recipient) - repo = HumanInputFormSubmissionRepository(_session_factory(session)) + _patch_repo_session_factory(monkeypatch, session) + repo = HumanInputFormSubmissionRepository() record = repo.get_by_token("token-123") @@ -499,7 +516,7 @@ class TestHumanInputFormSubmissionRepository: assert record.recipient_type == RecipientType.STANDALONE_WEB_APP assert record.submitted is False - def test_get_by_form_id_and_recipient_type_uses_recipient(self): + def test_get_by_form_id_and_recipient_type_uses_recipient(self, monkeypatch: pytest.MonkeyPatch): form = _DummyForm( id="form-1", workflow_run_id="run-1", @@ -518,7 +535,8 @@ class TestHumanInputFormSubmissionRepository: form=form, ) session = _FakeSession(scalars_result=recipient) - repo = HumanInputFormSubmissionRepository(_session_factory(session)) + _patch_repo_session_factory(monkeypatch, session) + repo = HumanInputFormSubmissionRepository() record = repo.get_by_form_id_and_recipient_type( form_id=form.id, @@ -553,7 +571,8 @@ class TestHumanInputFormSubmissionRepository: forms={form.id: form}, recipients={recipient.id: recipient}, ) - repo = HumanInputFormSubmissionRepository(_session_factory(session)) + _patch_repo_session_factory(monkeypatch, session) + repo = HumanInputFormSubmissionRepository() record: HumanInputFormRecord = repo.mark_submitted( form_id=form.id, diff --git a/api/tests/unit_tests/core/repositories/test_human_input_repository.py b/api/tests/unit_tests/core/repositories/test_human_input_repository.py new file mode 100644 index 0000000000..4116e8b4a5 --- /dev/null +++ b/api/tests/unit_tests/core/repositories/test_human_input_repository.py @@ -0,0 +1,677 @@ +from __future__ import annotations + +import dataclasses +import json +from collections.abc import Sequence +from datetime import datetime, timedelta +from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock + +import pytest + +from core.repositories.human_input_repository import ( + HumanInputFormRecord, + HumanInputFormRepositoryImpl, + HumanInputFormSubmissionRepository, + _HumanInputFormEntityImpl, + _HumanInputFormRecipientEntityImpl, + _InvalidTimeoutStatusError, + _WorkspaceMemberInfo, +) +from dify_graph.nodes.human_input.entities import ( + EmailDeliveryConfig, + EmailDeliveryMethod, + EmailRecipients, + ExternalRecipient, + HumanInputNodeData, + MemberRecipient, + UserAction, + WebAppDeliveryMethod, +) +from dify_graph.nodes.human_input.enums import HumanInputFormKind, HumanInputFormStatus +from dify_graph.repositories.human_input_form_repository import FormCreateParams, FormNotFoundError +from libs.datetime_utils import naive_utc_now +from models.human_input import HumanInputFormRecipient, RecipientType + + +@pytest.fixture(autouse=True) +def _stub_select(monkeypatch: pytest.MonkeyPatch) -> None: + class _FakeSelect: + def join(self, *_args: Any, **_kwargs: Any) -> _FakeSelect: + return self + + def where(self, *_args: Any, **_kwargs: Any) -> _FakeSelect: + return self + + def options(self, *_args: Any, **_kwargs: Any) -> _FakeSelect: + return self + + monkeypatch.setattr("core.repositories.human_input_repository.select", lambda *_args, **_kwargs: _FakeSelect()) + monkeypatch.setattr("core.repositories.human_input_repository.selectinload", lambda *_args, **_kwargs: "_loader") + + +def _make_form_definition_json(*, include_expiration_time: bool) -> str: + payload: dict[str, Any] = { + "form_content": "hi", + "inputs": [], + "user_actions": [{"id": "submit", "title": "Submit"}], + "rendered_content": "hi
", + } + if include_expiration_time: + payload["expiration_time"] = naive_utc_now() + return json.dumps(payload, default=str) + + +@dataclasses.dataclass +class _DummyForm: + id: str + workflow_run_id: str | None + node_id: str + tenant_id: str + app_id: str + form_definition: str + rendered_content: str + expiration_time: datetime + form_kind: HumanInputFormKind = HumanInputFormKind.RUNTIME + created_at: datetime = dataclasses.field(default_factory=naive_utc_now) + selected_action_id: str | None = None + submitted_data: str | None = None + submitted_at: datetime | None = None + submission_user_id: str | None = None + submission_end_user_id: str | None = None + completed_by_recipient_id: str | None = None + status: HumanInputFormStatus = HumanInputFormStatus.WAITING + + +@dataclasses.dataclass +class _DummyRecipient: + id: str + form_id: str + recipient_type: RecipientType + access_token: str | None + + +class _FakeScalarResult: + def __init__(self, obj: Any): + self._obj = obj + + def first(self) -> Any: + if isinstance(self._obj, list): + return self._obj[0] if self._obj else None + return self._obj + + def all(self) -> list[Any]: + if self._obj is None: + return [] + if isinstance(self._obj, list): + return list(self._obj) + return [self._obj] + + +class _FakeExecuteResult: + def __init__(self, rows: Sequence[tuple[Any, ...]]): + self._rows = list(rows) + + def all(self) -> list[tuple[Any, ...]]: + return list(self._rows) + + +class _FakeSession: + def __init__( + self, + *, + scalars_result: Any = None, + scalars_results: list[Any] | None = None, + forms: dict[str, _DummyForm] | None = None, + recipients: dict[str, _DummyRecipient] | None = None, + execute_rows: Sequence[tuple[Any, ...]] = (), + ): + if scalars_results is not None: + self._scalars_queue = list(scalars_results) + else: + self._scalars_queue = [scalars_result] + self._forms = forms or {} + self._recipients = recipients or {} + self._execute_rows = list(execute_rows) + self.added: list[Any] = [] + + def scalars(self, _query: Any) -> _FakeScalarResult: + if self._scalars_queue: + value = self._scalars_queue.pop(0) + else: + value = None + return _FakeScalarResult(value) + + def execute(self, _stmt: Any) -> _FakeExecuteResult: + return _FakeExecuteResult(self._execute_rows) + + def get(self, model_cls: Any, obj_id: str) -> Any: + name = getattr(model_cls, "__name__", "") + if name == "HumanInputForm": + return self._forms.get(obj_id) + if name == "HumanInputFormRecipient": + return self._recipients.get(obj_id) + return None + + def add(self, obj: Any) -> None: + self.added.append(obj) + + def add_all(self, objs: Sequence[Any]) -> None: + self.added.extend(list(objs)) + + def flush(self) -> None: + # Simulate DB default population for attributes referenced in entity wrappers. + for obj in self.added: + if hasattr(obj, "id") and obj.id in (None, ""): + obj.id = f"gen-{len(str(self.added))}" + if isinstance(obj, HumanInputFormRecipient) and obj.access_token is None: + if obj.recipient_type == RecipientType.CONSOLE: + obj.access_token = "token-console" + elif obj.recipient_type == RecipientType.BACKSTAGE: + obj.access_token = "token-backstage" + else: + obj.access_token = "token-webapp" + + def refresh(self, _obj: Any) -> None: + return None + + def begin(self) -> _FakeSession: + return self + + def __enter__(self) -> _FakeSession: + return self + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + +class _SessionFactoryStub: + def __init__(self, session: _FakeSession): + self._session = session + + def create_session(self) -> _FakeSession: + return self._session + + +def _patch_session_factory(monkeypatch: pytest.MonkeyPatch, session: _FakeSession) -> None: + monkeypatch.setattr("core.repositories.human_input_repository.session_factory", _SessionFactoryStub(session)) + + +def test_recipient_entity_token_raises_when_missing() -> None: + recipient = SimpleNamespace(id="r1", access_token=None) + entity = _HumanInputFormRecipientEntityImpl(recipient) # type: ignore[arg-type] + with pytest.raises(AssertionError, match="access_token should not be None"): + _ = entity.token + + +def test_recipient_entity_id_and_token_success() -> None: + recipient = SimpleNamespace(id="r1", access_token="tok") + entity = _HumanInputFormRecipientEntityImpl(recipient) # type: ignore[arg-type] + assert entity.id == "r1" + assert entity.token == "tok" + + +def test_form_entity_web_app_token_prefers_console_then_webapp_then_none() -> None: + form = _DummyForm( + id="f1", + workflow_run_id="run", + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=True), + rendered_content="x
", + expiration_time=naive_utc_now(), + ) + console = _DummyRecipient(id="c1", form_id=form.id, recipient_type=RecipientType.CONSOLE, access_token="ctok") + webapp = _DummyRecipient( + id="w1", form_id=form.id, recipient_type=RecipientType.STANDALONE_WEB_APP, access_token="wtok" + ) + + entity = _HumanInputFormEntityImpl(form_model=form, recipient_models=[webapp, console]) # type: ignore[arg-type] + assert entity.web_app_token == "ctok" + + entity = _HumanInputFormEntityImpl(form_model=form, recipient_models=[webapp]) # type: ignore[arg-type] + assert entity.web_app_token == "wtok" + + entity = _HumanInputFormEntityImpl(form_model=form, recipient_models=[]) # type: ignore[arg-type] + assert entity.web_app_token is None + + +def test_form_entity_submitted_data_parsed() -> None: + form = _DummyForm( + id="f1", + workflow_run_id="run", + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=True), + rendered_content="x
", + expiration_time=naive_utc_now(), + submitted_data='{"a": 1}', + submitted_at=naive_utc_now(), + ) + entity = _HumanInputFormEntityImpl(form_model=form, recipient_models=[]) # type: ignore[arg-type] + assert entity.submitted is True + assert entity.submitted_data == {"a": 1} + assert entity.rendered_content == "x
" + assert entity.selected_action_id is None + assert entity.status == HumanInputFormStatus.WAITING + + +def test_form_record_from_models_injects_expiration_time_when_missing() -> None: + expiration = naive_utc_now() + form = _DummyForm( + id="f1", + workflow_run_id=None, + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=False), + rendered_content="x
", + expiration_time=expiration, + submitted_data='{"k": "v"}', + ) + record = HumanInputFormRecord.from_models(form, None) # type: ignore[arg-type] + assert record.definition.expiration_time == expiration + assert record.submitted_data == {"k": "v"} + assert record.submitted is False + + +def test_create_email_recipients_from_resolved_dedupes_and_skips_blank(monkeypatch: pytest.MonkeyPatch) -> None: + created: list[SimpleNamespace] = [] + + def fake_new(cls, form_id: str, delivery_id: str, payload: Any): # type: ignore[no-untyped-def] + recipient = SimpleNamespace( + id=f"{payload.TYPE}-{len(created)}", + form_id=form_id, + delivery_id=delivery_id, + recipient_type=payload.TYPE, + recipient_payload=payload.model_dump_json(), + access_token="tok", + ) + created.append(recipient) + return recipient + + monkeypatch.setattr("core.repositories.human_input_repository.HumanInputFormRecipient.new", classmethod(fake_new)) + + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + recipients = repo._create_email_recipients_from_resolved( # type: ignore[attr-defined] + form_id="f", + delivery_id="d", + members=[ + _WorkspaceMemberInfo(user_id="u1", email=""), + _WorkspaceMemberInfo(user_id="u2", email="a@example.com"), + _WorkspaceMemberInfo(user_id="u3", email="a@example.com"), + ], + external_emails=["", "a@example.com", "b@example.com", "b@example.com"], + ) + assert [r.recipient_type for r in recipients] == [RecipientType.EMAIL_MEMBER, RecipientType.EMAIL_EXTERNAL] + + +def test_query_workspace_members_by_ids_empty_returns_empty() -> None: + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + assert repo._query_workspace_members_by_ids(session=MagicMock(), restrict_to_user_ids=["", ""]) == [] + + +def test_query_workspace_members_by_ids_maps_rows() -> None: + session = _FakeSession(execute_rows=[("u1", "a@example.com"), ("u2", "b@example.com")]) + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + rows = repo._query_workspace_members_by_ids(session=session, restrict_to_user_ids=["u1", "u2"]) + assert rows == [ + _WorkspaceMemberInfo(user_id="u1", email="a@example.com"), + _WorkspaceMemberInfo(user_id="u2", email="b@example.com"), + ] + + +def test_query_all_workspace_members_maps_rows() -> None: + session = _FakeSession(execute_rows=[("u1", "a@example.com")]) + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + rows = repo._query_all_workspace_members(session=session) + assert rows == [_WorkspaceMemberInfo(user_id="u1", email="a@example.com")] + + +def test_repository_init_sets_tenant_id() -> None: + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + assert repo._tenant_id == "tenant" + + +def test_delivery_method_to_model_webapp_creates_delivery_and_recipient(monkeypatch: pytest.MonkeyPatch) -> None: + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + monkeypatch.setattr("core.repositories.human_input_repository.uuidv7", lambda: "del-1") + result = repo._delivery_method_to_model( + session=MagicMock(), form_id="form-1", delivery_method=WebAppDeliveryMethod() + ) + assert result.delivery.id == "del-1" + assert result.delivery.form_id == "form-1" + assert len(result.recipients) == 1 + assert result.recipients[0].recipient_type == RecipientType.STANDALONE_WEB_APP + + +def test_delivery_method_to_model_email_uses_build_email_recipients(monkeypatch: pytest.MonkeyPatch) -> None: + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + monkeypatch.setattr("core.repositories.human_input_repository.uuidv7", lambda: "del-1") + called: dict[str, Any] = {} + + def fake_build(*, session: Any, form_id: str, delivery_id: str, recipients_config: Any) -> list[Any]: + called.update( + {"session": session, "form_id": form_id, "delivery_id": delivery_id, "recipients_config": recipients_config} + ) + return ["r"] + + monkeypatch.setattr(repo, "_build_email_recipients", fake_build) + + method = EmailDeliveryMethod( + config=EmailDeliveryConfig( + recipients=EmailRecipients( + whole_workspace=False, + items=[MemberRecipient(user_id="u1"), ExternalRecipient(email="e@example.com")], + ), + subject="s", + body="b", + ) + ) + result = repo._delivery_method_to_model(session="sess", form_id="form-1", delivery_method=method) + assert result.recipients == ["r"] + assert called["delivery_id"] == "del-1" + + +def test_build_email_recipients_uses_all_members_when_whole_workspace(monkeypatch: pytest.MonkeyPatch) -> None: + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + monkeypatch.setattr( + repo, + "_query_all_workspace_members", + lambda *, session: [_WorkspaceMemberInfo(user_id="u", email="a@example.com")], + ) + monkeypatch.setattr(repo, "_create_email_recipients_from_resolved", lambda **_: ["ok"]) + recipients = repo._build_email_recipients( + session=MagicMock(), + form_id="f", + delivery_id="d", + recipients_config=EmailRecipients(whole_workspace=True, items=[ExternalRecipient(email="e@example.com")]), + ) + assert recipients == ["ok"] + + +def test_build_email_recipients_uses_selected_members_when_not_whole_workspace(monkeypatch: pytest.MonkeyPatch) -> None: + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + + def fake_query(*, session: Any, restrict_to_user_ids: Sequence[str]) -> list[_WorkspaceMemberInfo]: + assert restrict_to_user_ids == ["u1"] + return [_WorkspaceMemberInfo(user_id="u1", email="a@example.com")] + + monkeypatch.setattr(repo, "_query_workspace_members_by_ids", fake_query) + monkeypatch.setattr(repo, "_create_email_recipients_from_resolved", lambda **_: ["ok"]) + recipients = repo._build_email_recipients( + session=MagicMock(), + form_id="f", + delivery_id="d", + recipients_config=EmailRecipients( + whole_workspace=False, + items=[MemberRecipient(user_id="u1"), ExternalRecipient(email="e@example.com")], + ), + ) + assert recipients == ["ok"] + + +def test_get_form_returns_entity_and_none_when_missing(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_session_factory(monkeypatch, _FakeSession(scalars_results=[None])) + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + assert repo.get_form("run", "node") is None + + form = _DummyForm( + id="f1", + workflow_run_id="run", + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=True), + rendered_content="x
", + expiration_time=naive_utc_now(), + ) + recipient = _DummyRecipient( + id="r1", + form_id=form.id, + recipient_type=RecipientType.STANDALONE_WEB_APP, + access_token="tok", + ) + session = _FakeSession(scalars_results=[form, [recipient]]) + _patch_session_factory(monkeypatch, session) + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + entity = repo.get_form("run", "node") + assert entity is not None + assert entity.id == "f1" + assert entity.recipients[0].id == "r1" + assert entity.recipients[0].token == "tok" + + +def test_create_form_adds_console_and_backstage_recipients(monkeypatch: pytest.MonkeyPatch) -> None: + fixed_now = datetime(2024, 1, 1, 0, 0, 0) + monkeypatch.setattr("core.repositories.human_input_repository.naive_utc_now", lambda: fixed_now) + + ids = iter(["form-id", "del-web", "del-console", "del-backstage"]) + monkeypatch.setattr("core.repositories.human_input_repository.uuidv7", lambda: next(ids)) + + session = _FakeSession() + _patch_session_factory(monkeypatch, session) + repo = HumanInputFormRepositoryImpl(tenant_id="tenant") + + form_config = HumanInputNodeData( + title="Title", + delivery_methods=[], + form_content="hello", + inputs=[], + user_actions=[UserAction(id="submit", title="Submit")], + ) + params = FormCreateParams( + app_id="app", + workflow_execution_id="run", + node_id="node", + form_config=form_config, + rendered_content="hello
", + delivery_methods=[WebAppDeliveryMethod()], + display_in_ui=True, + resolved_default_values={}, + form_kind=HumanInputFormKind.RUNTIME, + console_recipient_required=True, + console_creator_account_id="acc-1", + backstage_recipient_required=True, + ) + + entity = repo.create_form(params) + assert entity.id == "form-id" + assert entity.expiration_time == fixed_now + timedelta(hours=form_config.timeout) + # Console token should take precedence when console recipient is present. + assert entity.web_app_token == "token-console" + assert len(entity.recipients) == 3 + + +def test_submission_get_by_token_returns_none_when_missing_or_form_missing(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_session_factory(monkeypatch, _FakeSession(scalars_result=None)) + repo = HumanInputFormSubmissionRepository() + assert repo.get_by_token("tok") is None + + recipient = SimpleNamespace(form=None) + _patch_session_factory(monkeypatch, _FakeSession(scalars_result=recipient)) + repo = HumanInputFormSubmissionRepository() + assert repo.get_by_token("tok") is None + + +def test_submission_repository_init_no_args() -> None: + repo = HumanInputFormSubmissionRepository() + assert isinstance(repo, HumanInputFormSubmissionRepository) + + +def test_submission_get_by_token_and_get_by_form_id_success_paths(monkeypatch: pytest.MonkeyPatch) -> None: + form = _DummyForm( + id="f1", + workflow_run_id=None, + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=True), + rendered_content="x
", + expiration_time=naive_utc_now(), + ) + recipient = SimpleNamespace( + id="r1", + form_id=form.id, + recipient_type=RecipientType.STANDALONE_WEB_APP, + access_token="tok", + form=form, + ) + + _patch_session_factory(monkeypatch, _FakeSession(scalars_result=recipient)) + repo = HumanInputFormSubmissionRepository() + record = repo.get_by_token("tok") + assert record is not None + assert record.access_token == "tok" + + _patch_session_factory(monkeypatch, _FakeSession(scalars_result=recipient)) + repo = HumanInputFormSubmissionRepository() + record = repo.get_by_form_id_and_recipient_type(form_id=form.id, recipient_type=RecipientType.STANDALONE_WEB_APP) + assert record is not None + assert record.recipient_id == "r1" + + +def test_submission_get_by_form_id_returns_none_on_missing(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_session_factory(monkeypatch, _FakeSession(scalars_result=None)) + repo = HumanInputFormSubmissionRepository() + assert repo.get_by_form_id_and_recipient_type(form_id="f", recipient_type=RecipientType.CONSOLE) is None + + +def test_mark_submitted_updates_and_raises_when_missing(monkeypatch: pytest.MonkeyPatch) -> None: + fixed_now = datetime(2024, 1, 1, 0, 0, 0) + monkeypatch.setattr("core.repositories.human_input_repository.naive_utc_now", lambda: fixed_now) + + missing_session = _FakeSession(forms={}) + _patch_session_factory(monkeypatch, missing_session) + repo = HumanInputFormSubmissionRepository() + with pytest.raises(FormNotFoundError, match="form not found"): + repo.mark_submitted( + form_id="missing", + recipient_id=None, + selected_action_id="a", + form_data={}, + submission_user_id=None, + submission_end_user_id=None, + ) + + form = _DummyForm( + id="f", + workflow_run_id=None, + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=True), + rendered_content="x
", + expiration_time=fixed_now, + ) + recipient = _DummyRecipient(id="r", form_id=form.id, recipient_type=RecipientType.CONSOLE, access_token="tok") + session = _FakeSession(forms={form.id: form}, recipients={recipient.id: recipient}) + _patch_session_factory(monkeypatch, session) + repo = HumanInputFormSubmissionRepository() + record = repo.mark_submitted( + form_id=form.id, + recipient_id=recipient.id, + selected_action_id="approve", + form_data={"k": "v"}, + submission_user_id="u", + submission_end_user_id="eu", + ) + assert form.status == HumanInputFormStatus.SUBMITTED + assert form.submitted_at == fixed_now + assert record.submitted_data == {"k": "v"} + + +def test_mark_timeout_invalid_status_raises(monkeypatch: pytest.MonkeyPatch) -> None: + form = _DummyForm( + id="f", + workflow_run_id=None, + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=True), + rendered_content="x
", + expiration_time=naive_utc_now(), + ) + session = _FakeSession(forms={form.id: form}) + _patch_session_factory(monkeypatch, session) + repo = HumanInputFormSubmissionRepository() + with pytest.raises(_InvalidTimeoutStatusError, match="invalid timeout status"): + repo.mark_timeout(form_id=form.id, timeout_status=HumanInputFormStatus.SUBMITTED) # type: ignore[arg-type] + + +def test_mark_timeout_already_timed_out_returns_record(monkeypatch: pytest.MonkeyPatch) -> None: + form = _DummyForm( + id="f", + workflow_run_id=None, + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=True), + rendered_content="x
", + expiration_time=naive_utc_now(), + status=HumanInputFormStatus.TIMEOUT, + ) + session = _FakeSession(forms={form.id: form}) + _patch_session_factory(monkeypatch, session) + repo = HumanInputFormSubmissionRepository() + record = repo.mark_timeout(form_id=form.id, timeout_status=HumanInputFormStatus.TIMEOUT, reason="r") + assert record.status == HumanInputFormStatus.TIMEOUT + + +def test_mark_timeout_submitted_raises_form_not_found(monkeypatch: pytest.MonkeyPatch) -> None: + form = _DummyForm( + id="f", + workflow_run_id=None, + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=True), + rendered_content="x
", + expiration_time=naive_utc_now(), + status=HumanInputFormStatus.SUBMITTED, + ) + session = _FakeSession(forms={form.id: form}) + _patch_session_factory(monkeypatch, session) + repo = HumanInputFormSubmissionRepository() + with pytest.raises(FormNotFoundError, match="form already submitted"): + repo.mark_timeout(form_id=form.id, timeout_status=HumanInputFormStatus.EXPIRED) + + +def test_mark_timeout_updates_fields(monkeypatch: pytest.MonkeyPatch) -> None: + form = _DummyForm( + id="f", + workflow_run_id=None, + node_id="node", + tenant_id="tenant", + app_id="app", + form_definition=_make_form_definition_json(include_expiration_time=True), + rendered_content="x
", + expiration_time=naive_utc_now(), + selected_action_id="a", + submitted_data="{}", + submission_user_id="u", + submission_end_user_id="eu", + completed_by_recipient_id="r", + status=HumanInputFormStatus.WAITING, + ) + session = _FakeSession(forms={form.id: form}) + _patch_session_factory(monkeypatch, session) + repo = HumanInputFormSubmissionRepository() + record = repo.mark_timeout(form_id=form.id, timeout_status=HumanInputFormStatus.EXPIRED) + assert form.status == HumanInputFormStatus.EXPIRED + assert form.selected_action_id is None + assert form.submitted_data is None + assert form.submission_user_id is None + assert form.submission_end_user_id is None + assert form.completed_by_recipient_id is None + assert record.status == HumanInputFormStatus.EXPIRED + + +def test_mark_timeout_raises_when_form_missing(monkeypatch: pytest.MonkeyPatch) -> None: + _patch_session_factory(monkeypatch, _FakeSession(forms={})) + repo = HumanInputFormSubmissionRepository() + with pytest.raises(FormNotFoundError, match="form not found"): + repo.mark_timeout(form_id="missing", timeout_status=HumanInputFormStatus.TIMEOUT) diff --git a/api/tests/unit_tests/core/repositories/test_sqlalchemy_workflow_execution_repository.py b/api/tests/unit_tests/core/repositories/test_sqlalchemy_workflow_execution_repository.py new file mode 100644 index 0000000000..232ab07882 --- /dev/null +++ b/api/tests/unit_tests/core/repositories/test_sqlalchemy_workflow_execution_repository.py @@ -0,0 +1,291 @@ +from datetime import UTC, datetime +from unittest.mock import MagicMock +from uuid import uuid4 + +import pytest +from sqlalchemy.engine import Engine +from sqlalchemy.orm import sessionmaker + +from core.repositories.sqlalchemy_workflow_execution_repository import SQLAlchemyWorkflowExecutionRepository +from dify_graph.entities.workflow_execution import WorkflowExecution, WorkflowExecutionStatus, WorkflowType +from models import Account, CreatorUserRole, EndUser, WorkflowRun +from models.enums import WorkflowRunTriggeredFrom + + +@pytest.fixture +def mock_session_factory(): + """Mock SQLAlchemy session factory.""" + session_factory = MagicMock(spec=sessionmaker) + session = MagicMock() + session.get.return_value = None + session_factory.return_value.__enter__.return_value = session + return session_factory + + +@pytest.fixture +def mock_engine(): + """Mock SQLAlchemy Engine.""" + return MagicMock(spec=Engine) + + +@pytest.fixture +def mock_account(): + """Mock Account user.""" + account = MagicMock(spec=Account) + account.id = str(uuid4()) + account.current_tenant_id = str(uuid4()) + return account + + +@pytest.fixture +def mock_end_user(): + """Mock EndUser.""" + user = MagicMock(spec=EndUser) + user.id = str(uuid4()) + user.tenant_id = str(uuid4()) + return user + + +@pytest.fixture +def sample_workflow_execution(): + """Sample WorkflowExecution for testing.""" + return WorkflowExecution( + id_=str(uuid4()), + workflow_id=str(uuid4()), + workflow_type=WorkflowType.WORKFLOW, + workflow_version="1.0", + graph={"nodes": [], "edges": []}, + inputs={"input1": "value1"}, + outputs={"output1": "result1"}, + status=WorkflowExecutionStatus.SUCCEEDED, + error_message="", + total_tokens=100, + total_steps=5, + exceptions_count=0, + started_at=datetime.now(UTC), + finished_at=datetime.now(UTC), + ) + + +class TestSQLAlchemyWorkflowExecutionRepository: + def test_init_with_sessionmaker(self, mock_session_factory, mock_account): + app_id = "test_app_id" + triggered_from = WorkflowRunTriggeredFrom.APP_RUN + + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, user=mock_account, app_id=app_id, triggered_from=triggered_from + ) + + assert repo._session_factory == mock_session_factory + assert repo._tenant_id == mock_account.current_tenant_id + assert repo._app_id == app_id + assert repo._triggered_from == triggered_from + assert repo._creator_user_id == mock_account.id + assert repo._creator_user_role == CreatorUserRole.ACCOUNT + + def test_init_with_engine(self, mock_engine, mock_account): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_engine, + user=mock_account, + app_id="test_app_id", + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + ) + + assert isinstance(repo._session_factory, sessionmaker) + assert repo._session_factory.kw["bind"] == mock_engine + + def test_init_invalid_session_factory(self, mock_account): + with pytest.raises(ValueError, match="Invalid session_factory type"): + SQLAlchemyWorkflowExecutionRepository( + session_factory="invalid", user=mock_account, app_id=None, triggered_from=None + ) + + def test_init_no_tenant_id(self, mock_session_factory): + user = MagicMock(spec=Account) + user.current_tenant_id = None + + with pytest.raises(ValueError, match="User must have a tenant_id"): + SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, user=user, app_id=None, triggered_from=None + ) + + def test_init_with_end_user(self, mock_session_factory, mock_end_user): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, user=mock_end_user, app_id=None, triggered_from=None + ) + assert repo._tenant_id == mock_end_user.tenant_id + assert repo._creator_user_role == CreatorUserRole.END_USER + + def test_to_domain_model(self, mock_session_factory, mock_account): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, user=mock_account, app_id=None, triggered_from=None + ) + + db_model = MagicMock(spec=WorkflowRun) + db_model.id = str(uuid4()) + db_model.workflow_id = str(uuid4()) + db_model.type = "workflow" + db_model.version = "1.0" + db_model.inputs_dict = {"in": "val"} + db_model.outputs_dict = {"out": "val"} + db_model.graph_dict = {"nodes": []} + db_model.status = "succeeded" + db_model.error = "some error" + db_model.total_tokens = 50 + db_model.total_steps = 3 + db_model.exceptions_count = 1 + db_model.created_at = datetime.now(UTC) + db_model.finished_at = datetime.now(UTC) + + domain_model = repo._to_domain_model(db_model) + + assert domain_model.id_ == db_model.id + assert domain_model.workflow_id == db_model.workflow_id + assert domain_model.status == WorkflowExecutionStatus.SUCCEEDED + assert domain_model.inputs == db_model.inputs_dict + assert domain_model.error_message == "some error" + + def test_to_db_model(self, mock_session_factory, mock_account, sample_workflow_execution): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, + user=mock_account, + app_id="test_app", + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) + + # Make elapsed time deterministic to avoid flaky tests + sample_workflow_execution.started_at = datetime(2023, 1, 1, 0, 0, 0, tzinfo=UTC) + sample_workflow_execution.finished_at = datetime(2023, 1, 1, 0, 0, 10, tzinfo=UTC) + + db_model = repo._to_db_model(sample_workflow_execution) + + assert db_model.id == sample_workflow_execution.id_ + assert db_model.tenant_id == repo._tenant_id + assert db_model.app_id == "test_app" + assert db_model.triggered_from == WorkflowRunTriggeredFrom.DEBUGGING + assert db_model.status == sample_workflow_execution.status.value + assert db_model.total_tokens == sample_workflow_execution.total_tokens + assert db_model.elapsed_time == 10.0 + + def test_to_db_model_edge_cases(self, mock_session_factory, mock_account, sample_workflow_execution): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, + user=mock_account, + app_id="test_app", + triggered_from=WorkflowRunTriggeredFrom.DEBUGGING, + ) + # Test with empty/None fields + sample_workflow_execution.graph = None + sample_workflow_execution.inputs = None + sample_workflow_execution.outputs = None + sample_workflow_execution.error_message = None + sample_workflow_execution.finished_at = None + + db_model = repo._to_db_model(sample_workflow_execution) + + assert db_model.graph is None + assert db_model.inputs is None + assert db_model.outputs is None + assert db_model.error is None + assert db_model.elapsed_time == 0 + + def test_to_db_model_app_id_none(self, mock_session_factory, mock_account, sample_workflow_execution): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, + user=mock_account, + app_id=None, + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + ) + + db_model = repo._to_db_model(sample_workflow_execution) + assert not hasattr(db_model, "app_id") or db_model.app_id is None + assert db_model.tenant_id == repo._tenant_id + + def test_to_db_model_missing_context(self, mock_session_factory, mock_account, sample_workflow_execution): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, user=mock_account, app_id=None, triggered_from=None + ) + + # Test triggered_from missing + with pytest.raises(ValueError, match="triggered_from is required"): + repo._to_db_model(sample_workflow_execution) + + repo._triggered_from = WorkflowRunTriggeredFrom.APP_RUN + repo._creator_user_id = None + with pytest.raises(ValueError, match="created_by is required"): + repo._to_db_model(sample_workflow_execution) + + repo._creator_user_id = "some_id" + repo._creator_user_role = None + with pytest.raises(ValueError, match="created_by_role is required"): + repo._to_db_model(sample_workflow_execution) + + def test_save(self, mock_session_factory, mock_account, sample_workflow_execution): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, + user=mock_account, + app_id="test_app", + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + ) + + repo.save(sample_workflow_execution) + + session = mock_session_factory.return_value.__enter__.return_value + session.merge.assert_called_once() + session.commit.assert_called_once() + + # Check cache + assert sample_workflow_execution.id_ in repo._execution_cache + cached_model = repo._execution_cache[sample_workflow_execution.id_] + assert cached_model.id == sample_workflow_execution.id_ + + def test_save_uses_execution_started_at_when_record_does_not_exist( + self, mock_session_factory, mock_account, sample_workflow_execution + ): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, + user=mock_account, + app_id="test_app", + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + ) + + started_at = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC) + sample_workflow_execution.started_at = started_at + + session = mock_session_factory.return_value.__enter__.return_value + session.get.return_value = None + + repo.save(sample_workflow_execution) + + saved_model = session.merge.call_args.args[0] + assert saved_model.created_at == started_at + session.commit.assert_called_once() + + def test_save_preserves_existing_created_at_when_record_already_exists( + self, mock_session_factory, mock_account, sample_workflow_execution + ): + repo = SQLAlchemyWorkflowExecutionRepository( + session_factory=mock_session_factory, + user=mock_account, + app_id="test_app", + triggered_from=WorkflowRunTriggeredFrom.APP_RUN, + ) + + execution_id = sample_workflow_execution.id_ + existing_created_at = datetime(2026, 1, 1, 12, 0, 0, tzinfo=UTC) + + existing_run = WorkflowRun() + existing_run.id = execution_id + existing_run.tenant_id = repo._tenant_id + existing_run.created_at = existing_created_at + + session = mock_session_factory.return_value.__enter__.return_value + session.get.return_value = existing_run + + sample_workflow_execution.started_at = datetime(2026, 1, 1, 12, 30, 0, tzinfo=UTC) + + repo.save(sample_workflow_execution) + + saved_model = session.merge.call_args.args[0] + assert saved_model.created_at == existing_created_at + session.commit.assert_called_once() diff --git a/api/tests/unit_tests/core/repositories/test_sqlalchemy_workflow_node_execution_repository.py b/api/tests/unit_tests/core/repositories/test_sqlalchemy_workflow_node_execution_repository.py new file mode 100644 index 0000000000..73de15e2cf --- /dev/null +++ b/api/tests/unit_tests/core/repositories/test_sqlalchemy_workflow_node_execution_repository.py @@ -0,0 +1,772 @@ +from __future__ import annotations + +import json +import logging +from collections.abc import Mapping +from datetime import UTC, datetime +from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock, Mock + +import psycopg2.errors +import pytest +from sqlalchemy import Engine, create_engine +from sqlalchemy.exc import IntegrityError +from sqlalchemy.orm import sessionmaker + +from configs import dify_config +from core.repositories.sqlalchemy_workflow_node_execution_repository import ( + SQLAlchemyWorkflowNodeExecutionRepository, + _deterministic_json_dump, + _filter_by_offload_type, + _find_first, + _replace_or_append_offload, +) +from dify_graph.entities import WorkflowNodeExecution +from dify_graph.enums import ( + BuiltinNodeTypes, + WorkflowNodeExecutionMetadataKey, + WorkflowNodeExecutionStatus, +) +from dify_graph.repositories.workflow_node_execution_repository import OrderConfig +from models import Account, EndUser +from models.enums import ExecutionOffLoadType +from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload, WorkflowNodeExecutionTriggeredFrom + + +def _mock_account(*, tenant_id: str = "tenant", user_id: str = "user") -> Account: + user = Mock(spec=Account) + user.id = user_id + user.current_tenant_id = tenant_id + return user + + +def _mock_end_user(*, tenant_id: str = "tenant", user_id: str = "user") -> EndUser: + user = Mock(spec=EndUser) + user.id = user_id + user.tenant_id = tenant_id + return user + + +def _execution( + *, + execution_id: str = "exec-id", + node_execution_id: str = "node-exec-id", + workflow_run_id: str = "run-id", + status: WorkflowNodeExecutionStatus = WorkflowNodeExecutionStatus.SUCCEEDED, + inputs: Mapping[str, Any] | None = None, + outputs: Mapping[str, Any] | None = None, + process_data: Mapping[str, Any] | None = None, + metadata: Mapping[WorkflowNodeExecutionMetadataKey, Any] | None = None, +) -> WorkflowNodeExecution: + return WorkflowNodeExecution( + id=execution_id, + node_execution_id=node_execution_id, + workflow_id="workflow-id", + workflow_execution_id=workflow_run_id, + index=1, + predecessor_node_id=None, + node_id="node-id", + node_type=BuiltinNodeTypes.LLM, + title="Title", + inputs=inputs, + outputs=outputs, + process_data=process_data, + status=status, + error=None, + elapsed_time=1.0, + metadata=metadata, + created_at=datetime.now(UTC), + finished_at=None, + ) + + +class _SessionCtx: + def __init__(self, session: Any): + self._session = session + + def __enter__(self) -> Any: + return self._session + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + +def _session_factory(session: Any) -> sessionmaker: + factory = Mock(spec=sessionmaker) + factory.return_value = _SessionCtx(session) + return factory + + +def test_init_accepts_engine_and_sessionmaker_and_sets_role(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + + engine: Engine = create_engine("sqlite:///:memory:") + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=engine, + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + assert isinstance(repo._session_factory, sessionmaker) + + sm = Mock(spec=sessionmaker) + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=sm, + user=_mock_end_user(), + app_id="app", + triggered_from=WorkflowNodeExecutionTriggeredFrom.SINGLE_STEP, + ) + assert repo._creator_user_role.value == "end_user" + + +def test_init_rejects_invalid_session_factory_type(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + with pytest.raises(ValueError, match="Invalid session_factory type"): + SQLAlchemyWorkflowNodeExecutionRepository( # type: ignore[arg-type] + session_factory=object(), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + +def test_init_requires_tenant_id(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + user = _mock_account() + user.current_tenant_id = None + with pytest.raises(ValueError, match="User must have a tenant_id"): + SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=user, + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + +def test_create_truncator_uses_config(monkeypatch: pytest.MonkeyPatch) -> None: + created: dict[str, Any] = {} + + class FakeTruncator: + def __init__(self, *, max_size_bytes: int, array_element_limit: int, string_length_limit: int): + created.update( + { + "max_size_bytes": max_size_bytes, + "array_element_limit": array_element_limit, + "string_length_limit": string_length_limit, + } + ) + + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.VariableTruncator", + FakeTruncator, + ) + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + _ = repo._create_truncator() + assert created["max_size_bytes"] == dify_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE + + +def test_helpers_find_first_and_replace_or_append_and_filter() -> None: + assert _deterministic_json_dump({"b": 1, "a": 2}) == '{"a": 2, "b": 1}' + assert _find_first([], lambda _: True) is None + assert _find_first([1, 2, 3], lambda x: x > 1) == 2 + + off1 = WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.INPUTS) + off2 = WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.OUTPUTS) + assert _find_first([off1, off2], _filter_by_offload_type(ExecutionOffLoadType.OUTPUTS)) is off2 + + replaced = _replace_or_append_offload([off1, off2], WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.INPUTS)) + assert len(replaced) == 2 + assert [o.type_ for o in replaced] == [ExecutionOffLoadType.OUTPUTS, ExecutionOffLoadType.INPUTS] + + +def test_to_db_model_requires_constructor_context(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + execution = _execution(inputs={"b": 1, "a": 2}, metadata={WorkflowNodeExecutionMetadataKey.TOTAL_TOKENS: 1}) + + # Happy path: deterministic json dump should be sorted + db_model = repo._to_db_model(execution) + assert json.loads(db_model.inputs or "{}") == {"a": 2, "b": 1} + assert json.loads(db_model.execution_metadata or "{}")["total_tokens"] == 1 + + repo._triggered_from = None + with pytest.raises(ValueError, match="triggered_from is required"): + repo._to_db_model(execution) + + +def test_to_db_model_requires_creator_user_id_and_role(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id="app", + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + execution = _execution() + db_model = repo._to_db_model(execution) + assert db_model.app_id == "app" + + repo._creator_user_id = None + with pytest.raises(ValueError, match="created_by is required"): + repo._to_db_model(execution) + + repo._creator_user_id = "user" + repo._creator_user_role = None + with pytest.raises(ValueError, match="created_by_role is required"): + repo._to_db_model(execution) + + +def test_is_duplicate_key_error_and_regenerate_id( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + unique = Mock(spec=psycopg2.errors.UniqueViolation) + duplicate_error = IntegrityError("dup", params=None, orig=unique) + assert repo._is_duplicate_key_error(duplicate_error) is True + assert repo._is_duplicate_key_error(IntegrityError("other", params=None, orig=None)) is False + + execution = _execution(execution_id="old-id") + db_model = WorkflowNodeExecutionModel() + db_model.id = "old-id" + monkeypatch.setattr("core.repositories.sqlalchemy_workflow_node_execution_repository.uuidv7", lambda: "new-id") + caplog.set_level(logging.WARNING) + repo._regenerate_id_on_duplicate(execution, db_model) + assert execution.id == "new-id" + assert db_model.id == "new-id" + assert any("Duplicate key conflict" in r.message for r in caplog.records) + + +def test_persist_to_database_updates_existing_and_inserts_new(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + session = MagicMock() + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=_session_factory(session), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + db_model = WorkflowNodeExecutionModel() + db_model.id = "id1" + db_model.node_execution_id = "node1" + db_model.foo = "bar" # type: ignore[attr-defined] + db_model.__dict__["_private"] = "x" + + existing = SimpleNamespace() + session.get.return_value = existing + repo._persist_to_database(db_model) + assert existing.foo == "bar" + session.add.assert_not_called() + assert repo._node_execution_cache["node1"] is db_model + + session.reset_mock() + session.get.return_value = None + repo._node_execution_cache.clear() + repo._persist_to_database(db_model) + session.add.assert_called_once_with(db_model) + assert repo._node_execution_cache["node1"] is db_model + + +def test_truncate_and_upload_returns_none_when_no_values_or_not_truncated(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id="app", + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + assert repo._truncate_and_upload(None, "e", ExecutionOffLoadType.INPUTS) is None + + class FakeTruncator: + def truncate_variable_mapping(self, value: Any): # type: ignore[no-untyped-def] + return value, False + + monkeypatch.setattr(repo, "_create_truncator", lambda: FakeTruncator()) + assert repo._truncate_and_upload({"a": 1}, "e", ExecutionOffLoadType.INPUTS) is None + + +def test_truncate_and_upload_uploads_and_builds_offload(monkeypatch: pytest.MonkeyPatch) -> None: + uploaded: dict[str, Any] = {} + + class FakeFileService: + def upload_file(self, *, filename: str, content: bytes, mimetype: str, user: Any): # type: ignore[no-untyped-def] + uploaded.update({"filename": filename, "content": content, "mimetype": mimetype, "user": user}) + return SimpleNamespace(id="file-id", key="file-key") + + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", lambda *_: FakeFileService() + ) + monkeypatch.setattr("core.repositories.sqlalchemy_workflow_node_execution_repository.uuidv7", lambda: "offload-id") + + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id="app", + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + class FakeTruncator: + def truncate_variable_mapping(self, value: Any): # type: ignore[no-untyped-def] + return {"truncated": True}, True + + monkeypatch.setattr(repo, "_create_truncator", lambda: FakeTruncator()) + + result = repo._truncate_and_upload({"a": 1}, "exec", ExecutionOffLoadType.INPUTS) + assert result is not None + assert result.truncated_value == {"truncated": True} + assert uploaded["filename"].startswith("node_execution_exec_inputs.json") + assert result.offload.file_id == "file-id" + assert result.offload.type_ == ExecutionOffLoadType.INPUTS + + +def test_to_domain_model_loads_offloaded_files(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + db_model = WorkflowNodeExecutionModel() + db_model.id = "id" + db_model.node_execution_id = "node-exec" + db_model.workflow_id = "wf" + db_model.workflow_run_id = "run" + db_model.index = 1 + db_model.predecessor_node_id = None + db_model.node_id = "node" + db_model.node_type = BuiltinNodeTypes.LLM + db_model.title = "t" + db_model.inputs = json.dumps({"trunc": "i"}) + db_model.process_data = json.dumps({"trunc": "p"}) + db_model.outputs = json.dumps({"trunc": "o"}) + db_model.status = WorkflowNodeExecutionStatus.SUCCEEDED + db_model.error = None + db_model.elapsed_time = 0.1 + db_model.execution_metadata = json.dumps({"total_tokens": 3}) + db_model.created_at = datetime.now(UTC) + db_model.finished_at = None + + off_in = WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.INPUTS) + off_out = WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.OUTPUTS) + off_proc = WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.PROCESS_DATA) + off_in.file = SimpleNamespace(key="k-in") + off_out.file = SimpleNamespace(key="k-out") + off_proc.file = SimpleNamespace(key="k-proc") + db_model.offload_data = [off_out, off_in, off_proc] + + def fake_load(key: str) -> bytes: + return json.dumps({"full": key}).encode() + + monkeypatch.setattr("core.repositories.sqlalchemy_workflow_node_execution_repository.storage.load", fake_load) + + domain = repo._to_domain_model(db_model) + assert domain.inputs == {"full": "k-in"} + assert domain.outputs == {"full": "k-out"} + assert domain.process_data == {"full": "k-proc"} + assert domain.get_truncated_inputs() == {"trunc": "i"} + assert domain.get_truncated_outputs() == {"trunc": "o"} + assert domain.get_truncated_process_data() == {"trunc": "p"} + + +def test_to_domain_model_returns_early_when_no_offload_data(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + db_model = WorkflowNodeExecutionModel() + db_model.id = "id" + db_model.node_execution_id = "node-exec" + db_model.workflow_id = "wf" + db_model.workflow_run_id = "run" + db_model.index = 1 + db_model.predecessor_node_id = None + db_model.node_id = "node" + db_model.node_type = BuiltinNodeTypes.LLM + db_model.title = "t" + db_model.inputs = json.dumps({"i": 1}) + db_model.process_data = json.dumps({"p": 2}) + db_model.outputs = json.dumps({"o": 3}) + db_model.status = WorkflowNodeExecutionStatus.SUCCEEDED + db_model.error = None + db_model.elapsed_time = 0.1 + db_model.execution_metadata = "{}" + db_model.created_at = datetime.now(UTC) + db_model.finished_at = None + db_model.offload_data = [] + + domain = repo._to_domain_model(db_model) + assert domain.inputs == {"i": 1} + assert domain.outputs == {"o": 3} + + +def test_json_encode_uses_runtime_converter(monkeypatch: pytest.MonkeyPatch) -> None: + class FakeConverter: + def to_json_encodable(self, values: Mapping[str, Any]) -> Mapping[str, Any]: + return {"wrapped": values["a"]} + + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.WorkflowRuntimeTypeConverter", + FakeConverter, + ) + assert SQLAlchemyWorkflowNodeExecutionRepository._json_encode({"a": 1}) == '{"wrapped": 1}' + + +def test_save_execution_data_handles_existing_db_model_and_truncation(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + session = MagicMock() + session.execute.return_value.scalars.return_value.first.return_value = SimpleNamespace( + id="id", + offload_data=[WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.INPUTS)], + inputs=None, + outputs=None, + process_data=None, + ) + session.merge = Mock() + session.flush = Mock() + session.begin.return_value.__enter__ = Mock(return_value=session) + session.begin.return_value.__exit__ = Mock(return_value=None) + + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=_session_factory(session), + user=_mock_account(), + app_id="app", + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + execution = _execution(inputs={"a": 1}, outputs={"b": 2}, process_data={"c": 3}) + + trunc_result = SimpleNamespace( + truncated_value={"trunc": True}, + offload=WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.INPUTS, file_id="f1"), + ) + monkeypatch.setattr( + repo, "_truncate_and_upload", lambda values, *_args, **_kwargs: trunc_result if values == {"a": 1} else None + ) + monkeypatch.setattr(repo, "_json_encode", lambda values: json.dumps(values, sort_keys=True)) + + repo.save_execution_data(execution) + # Inputs should be truncated, outputs/process_data encoded directly + db_model = session.merge.call_args.args[0] + assert json.loads(db_model.inputs) == {"trunc": True} + assert json.loads(db_model.outputs) == {"b": 2} + assert json.loads(db_model.process_data) == {"c": 3} + assert any(off.type_ == ExecutionOffLoadType.INPUTS for off in db_model.offload_data) + assert execution.get_truncated_inputs() == {"trunc": True} + + +def test_save_execution_data_truncates_outputs_and_process_data(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + existing = SimpleNamespace( + id="id", + offload_data=[], + inputs=None, + outputs=None, + process_data=None, + ) + session = MagicMock() + session.execute.return_value.scalars.return_value.first.return_value = existing + session.merge = Mock() + session.flush = Mock() + session.begin.return_value.__enter__ = Mock(return_value=session) + session.begin.return_value.__exit__ = Mock(return_value=None) + + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=_session_factory(session), + user=_mock_account(), + app_id="app", + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + execution = _execution(inputs={"a": 1}, outputs={"b": 2}, process_data={"c": 3}) + + def trunc(values: Mapping[str, Any], *_args: Any, **_kwargs: Any) -> Any: + if values == {"b": 2}: + return SimpleNamespace( + truncated_value={"b": "trunc"}, + offload=WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.OUTPUTS, file_id="f2"), + ) + if values == {"c": 3}: + return SimpleNamespace( + truncated_value={"c": "trunc"}, + offload=WorkflowNodeExecutionOffload(type_=ExecutionOffLoadType.PROCESS_DATA, file_id="f3"), + ) + return None + + monkeypatch.setattr(repo, "_truncate_and_upload", trunc) + monkeypatch.setattr(repo, "_json_encode", lambda values: json.dumps(values, sort_keys=True)) + + repo.save_execution_data(execution) + db_model = session.merge.call_args.args[0] + assert json.loads(db_model.outputs) == {"b": "trunc"} + assert json.loads(db_model.process_data) == {"c": "trunc"} + assert execution.get_truncated_outputs() == {"b": "trunc"} + assert execution.get_truncated_process_data() == {"c": "trunc"} + + +def test_save_execution_data_handles_missing_db_model(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + session = MagicMock() + session.execute.return_value.scalars.return_value.first.return_value = None + session.merge = Mock() + session.flush = Mock() + session.begin.return_value.__enter__ = Mock(return_value=session) + session.begin.return_value.__exit__ = Mock(return_value=None) + + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=_session_factory(session), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + execution = _execution(inputs={"a": 1}) + fake_db_model = SimpleNamespace(id=execution.id, offload_data=[], inputs=None, outputs=None, process_data=None) + monkeypatch.setattr(repo, "_to_db_model", lambda *_: fake_db_model) + monkeypatch.setattr(repo, "_truncate_and_upload", lambda *_args, **_kwargs: None) + monkeypatch.setattr(repo, "_json_encode", lambda values: json.dumps(values)) + + repo.save_execution_data(execution) + merged = session.merge.call_args.args[0] + assert merged.inputs == '{"a": 1}' + + +def test_save_retries_duplicate_and_logs_non_duplicate( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + execution = _execution(execution_id="id") + unique = Mock(spec=psycopg2.errors.UniqueViolation) + duplicate_error = IntegrityError("dup", params=None, orig=unique) + other_error = IntegrityError("other", params=None, orig=None) + + calls = {"n": 0} + + def persist(_db_model: Any) -> None: + calls["n"] += 1 + if calls["n"] == 1: + raise duplicate_error + + monkeypatch.setattr(repo, "_persist_to_database", persist) + monkeypatch.setattr("core.repositories.sqlalchemy_workflow_node_execution_repository.uuidv7", lambda: "new-id") + repo.save(execution) + assert execution.id == "new-id" + assert repo._node_execution_cache[execution.node_execution_id] is not None + + caplog.set_level(logging.ERROR) + monkeypatch.setattr(repo, "_persist_to_database", lambda _db: (_ for _ in ()).throw(other_error)) + with pytest.raises(IntegrityError): + repo.save(_execution(execution_id="id2", node_execution_id="node2")) + assert any("Non-duplicate key integrity error" in r.message for r in caplog.records) + + +def test_save_logs_and_reraises_on_unexpected_error( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + caplog.set_level(logging.ERROR) + monkeypatch.setattr(repo, "_persist_to_database", lambda _db: (_ for _ in ()).throw(RuntimeError("boom"))) + with pytest.raises(RuntimeError, match="boom"): + repo.save(_execution(execution_id="id3", node_execution_id="node3")) + assert any("Failed to save workflow node execution" in r.message for r in caplog.records) + + +def test_get_db_models_by_workflow_run_orders_and_caches(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + + class FakeStmt: + def __init__(self) -> None: + self.where_calls = 0 + self.order_by_args: tuple[Any, ...] | None = None + + def where(self, *_args: Any) -> FakeStmt: + self.where_calls += 1 + return self + + def order_by(self, *args: Any) -> FakeStmt: + self.order_by_args = args + return self + + stmt = FakeStmt() + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.WorkflowNodeExecutionModel.preload_offload_data_and_files", + lambda _q: stmt, + ) + monkeypatch.setattr("core.repositories.sqlalchemy_workflow_node_execution_repository.select", lambda *_: "select") + + model1 = SimpleNamespace(node_execution_id="n1") + model2 = SimpleNamespace(node_execution_id=None) + session = MagicMock() + session.scalars.return_value.all.return_value = [model1, model2] + + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=_session_factory(session), + user=_mock_account(), + app_id="app", + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + order = OrderConfig(order_by=["index", "missing"], order_direction="desc") + db_models = repo.get_db_models_by_workflow_run("run", order) + assert db_models == [model1, model2] + assert repo._node_execution_cache["n1"] is model1 + assert stmt.order_by_args is not None + + +def test_get_db_models_by_workflow_run_uses_asc_order(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + + class FakeStmt: + def where(self, *_args: Any) -> FakeStmt: + return self + + def order_by(self, *args: Any) -> FakeStmt: + self.args = args # type: ignore[attr-defined] + return self + + stmt = FakeStmt() + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.WorkflowNodeExecutionModel.preload_offload_data_and_files", + lambda _q: stmt, + ) + monkeypatch.setattr("core.repositories.sqlalchemy_workflow_node_execution_repository.select", lambda *_: "select") + + session = MagicMock() + session.scalars.return_value.all.return_value = [] + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=_session_factory(session), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + repo.get_db_models_by_workflow_run("run", OrderConfig(order_by=["index"], order_direction="asc")) + + +def test_get_by_workflow_run_maps_to_domain(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.FileService", + lambda *_: SimpleNamespace(upload_file=Mock()), + ) + + repo = SQLAlchemyWorkflowNodeExecutionRepository( + session_factory=Mock(spec=sessionmaker), + user=_mock_account(), + app_id=None, + triggered_from=WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN, + ) + + db_models = [SimpleNamespace(id="db1"), SimpleNamespace(id="db2")] + monkeypatch.setattr(repo, "get_db_models_by_workflow_run", lambda *_args, **_kwargs: db_models) + monkeypatch.setattr(repo, "_to_domain_model", lambda m: f"domain:{m.id}") + + class FakeExecutor: + def __enter__(self) -> FakeExecutor: + return self + + def __exit__(self, exc_type, exc, tb) -> None: + return None + + def map(self, func, items, timeout: int): # type: ignore[no-untyped-def] + assert timeout == 30 + return list(map(func, items)) + + monkeypatch.setattr( + "core.repositories.sqlalchemy_workflow_node_execution_repository.ThreadPoolExecutor", + lambda max_workers: FakeExecutor(), + ) + + result = repo.get_by_workflow_run("run", order_config=None) + assert result == ["domain:db1", "domain:db2"] diff --git a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py index 07f28f162a..456c3dde12 100644 --- a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py +++ b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_conflict_handling.py @@ -10,11 +10,11 @@ from sqlalchemy.orm import sessionmaker from core.repositories.sqlalchemy_workflow_node_execution_repository import ( SQLAlchemyWorkflowNodeExecutionRepository, ) -from core.workflow.entities.workflow_node_execution import ( +from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from core.workflow.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from libs.datetime_utils import naive_utc_now from models import Account, WorkflowNodeExecutionTriggeredFrom @@ -70,7 +70,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, @@ -108,7 +108,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.SUCCEEDED, @@ -153,7 +153,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, @@ -195,7 +195,7 @@ class TestWorkflowNodeExecutionConflictHandling: workflow_execution_id="test-workflow-execution-id", node_execution_id="test-node-execution-id", node_id="test-node-id", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, title="Test Node", index=1, status=WorkflowNodeExecutionStatus.RUNNING, diff --git a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py index 485be90eae..eeab81a178 100644 --- a/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py +++ b/api/tests/unit_tests/core/repositories/test_workflow_node_execution_truncation.py @@ -13,14 +13,15 @@ from unittest.mock import MagicMock from sqlalchemy import Engine +from configs import dify_config from core.repositories.sqlalchemy_workflow_node_execution_repository import ( SQLAlchemyWorkflowNodeExecutionRepository, ) -from core.workflow.entities.workflow_node_execution import ( +from dify_graph.entities.workflow_node_execution import ( WorkflowNodeExecution, WorkflowNodeExecutionStatus, ) -from core.workflow.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes from models import Account, WorkflowNodeExecutionTriggeredFrom from models.enums import ExecutionOffLoadType from models.workflow import WorkflowNodeExecutionModel, WorkflowNodeExecutionOffload @@ -41,7 +42,7 @@ class TruncationTestCase: def create_test_cases() -> list[TruncationTestCase]: """Create test cases for different truncation scenarios.""" # Create large data that will definitely exceed the threshold (10KB) - large_data = {"data": "x" * (TRUNCATION_SIZE_THRESHOLD + 1000)} + large_data = {"data": "x" * (dify_config.WORKFLOW_VARIABLE_TRUNCATION_MAX_SIZE + 1000)} small_data = {"data": "small"} return [ @@ -101,7 +102,7 @@ def create_workflow_node_execution( workflow_execution_id="test-workflow-execution-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", inputs=inputs, outputs=outputs, @@ -145,7 +146,7 @@ class TestSQLAlchemyWorkflowNodeExecutionRepositoryTruncation: db_model.index = 1 db_model.predecessor_node_id = None db_model.node_id = "node-id" - db_model.node_type = NodeType.LLM + db_model.node_type = BuiltinNodeTypes.LLM db_model.title = "Test Node" db_model.inputs = json.dumps({"value": "inputs"}) db_model.process_data = json.dumps({"value": "process_data"}) diff --git a/api/tests/unit_tests/core/schemas/test_registry.py b/api/tests/unit_tests/core/schemas/test_registry.py new file mode 100644 index 0000000000..5749e72eb0 --- /dev/null +++ b/api/tests/unit_tests/core/schemas/test_registry.py @@ -0,0 +1,137 @@ +import json +from unittest.mock import patch + +from core.schemas.registry import SchemaRegistry + + +class TestSchemaRegistry: + def test_initialization(self, tmp_path): + base_dir = tmp_path / "schemas" + base_dir.mkdir() + registry = SchemaRegistry(str(base_dir)) + assert registry.base_dir == base_dir + assert registry.versions == {} + assert registry.metadata == {} + + def test_default_registry_singleton(self): + registry1 = SchemaRegistry.default_registry() + registry2 = SchemaRegistry.default_registry() + assert registry1 is registry2 + assert isinstance(registry1, SchemaRegistry) + + def test_load_all_versions_non_existent_dir(self, tmp_path): + base_dir = tmp_path / "non_existent" + registry = SchemaRegistry(str(base_dir)) + registry.load_all_versions() + assert registry.versions == {} + + def test_load_all_versions_filtering(self, tmp_path): + base_dir = tmp_path / "schemas" + base_dir.mkdir() + (base_dir / "not_a_version_dir").mkdir() + (base_dir / "v1").mkdir() + (base_dir / "some_file.txt").write_text("content") + + registry = SchemaRegistry(str(base_dir)) + with patch.object(registry, "_load_version_dir") as mock_load: + registry.load_all_versions() + mock_load.assert_called_once() + assert mock_load.call_args[0][0] == "v1" + + def test_load_version_dir_filtering(self, tmp_path): + version_dir = tmp_path / "v1" + version_dir.mkdir() + (version_dir / "schema1.json").write_text("{}") + (version_dir / "not_a_schema.txt").write_text("content") + + registry = SchemaRegistry(str(tmp_path)) + with patch.object(registry, "_load_schema") as mock_load: + registry._load_version_dir("v1", version_dir) + mock_load.assert_called_once() + assert mock_load.call_args[0][1] == "schema1" + + def test_load_version_dir_non_existent(self, tmp_path): + version_dir = tmp_path / "non_existent" + registry = SchemaRegistry(str(tmp_path)) + registry._load_version_dir("v1", version_dir) + assert "v1" not in registry.versions + + def test_load_schema_success(self, tmp_path): + schema_path = tmp_path / "test.json" + schema_content = {"title": "Test Schema", "description": "A test schema"} + schema_path.write_text(json.dumps(schema_content)) + + registry = SchemaRegistry(str(tmp_path)) + registry.versions["v1"] = {} + registry._load_schema("v1", "test", schema_path) + + assert registry.versions["v1"]["test"] == schema_content + uri = "https://dify.ai/schemas/v1/test.json" + assert registry.metadata[uri]["title"] == "Test Schema" + assert registry.metadata[uri]["version"] == "v1" + + def test_load_schema_invalid_json(self, tmp_path, caplog): + schema_path = tmp_path / "invalid.json" + schema_path.write_text("invalid json") + + registry = SchemaRegistry(str(tmp_path)) + registry.versions["v1"] = {} + registry._load_schema("v1", "invalid", schema_path) + + assert "Failed to load schema v1/invalid" in caplog.text + + def test_load_schema_os_error(self, tmp_path, caplog): + schema_path = tmp_path / "error.json" + schema_path.write_text("{}") + + registry = SchemaRegistry(str(tmp_path)) + registry.versions["v1"] = {} + + with patch("builtins.open", side_effect=OSError("Read error")): + registry._load_schema("v1", "error", schema_path) + + assert "Failed to load schema v1/error" in caplog.text + + def test_get_schema(self): + registry = SchemaRegistry("/tmp") + registry.versions = {"v1": {"test": {"type": "object"}}} + + # Valid URI + assert registry.get_schema("https://dify.ai/schemas/v1/test.json") == {"type": "object"} + + # Invalid URI + assert registry.get_schema("invalid-uri") is None + + # Missing version + assert registry.get_schema("https://dify.ai/schemas/v2/test.json") is None + + def test_list_versions(self): + registry = SchemaRegistry("/tmp") + registry.versions = {"v2": {}, "v1": {}} + assert registry.list_versions() == ["v1", "v2"] + + def test_list_schemas(self): + registry = SchemaRegistry("/tmp") + registry.versions = {"v1": {"b": {}, "a": {}}} + + assert registry.list_schemas("v1") == ["a", "b"] + assert registry.list_schemas("v2") == [] + + def test_get_all_schemas_for_version(self): + registry = SchemaRegistry("/tmp") + registry.versions = {"v1": {"test": {"title": "Test Label"}}} + + results = registry.get_all_schemas_for_version("v1") + assert len(results) == 1 + assert results[0]["name"] == "test" + assert results[0]["label"] == "Test Label" + assert results[0]["schema"] == {"title": "Test Label"} + + # Default label if title missing + registry.versions["v1"]["no_title"] = {} + results = registry.get_all_schemas_for_version("v1") + item = next(r for r in results if r["name"] == "no_title") + assert item["label"] == "no_title" + + # Empty if version missing + assert registry.get_all_schemas_for_version("v2") == [] diff --git a/api/tests/unit_tests/core/schemas/test_schema_manager.py b/api/tests/unit_tests/core/schemas/test_schema_manager.py new file mode 100644 index 0000000000..cb07340c6d --- /dev/null +++ b/api/tests/unit_tests/core/schemas/test_schema_manager.py @@ -0,0 +1,80 @@ +from unittest.mock import MagicMock, patch + +from core.schemas.registry import SchemaRegistry +from core.schemas.schema_manager import SchemaManager + + +def test_init_with_provided_registry(): + mock_registry = MagicMock(spec=SchemaRegistry) + manager = SchemaManager(registry=mock_registry) + assert manager.registry == mock_registry + + +@patch("core.schemas.schema_manager.SchemaRegistry.default_registry") +def test_init_with_default_registry(mock_default_registry): + mock_registry = MagicMock(spec=SchemaRegistry) + mock_default_registry.return_value = mock_registry + + manager = SchemaManager() + + mock_default_registry.assert_called_once() + assert manager.registry == mock_registry + + +def test_get_all_schema_definitions(): + mock_registry = MagicMock(spec=SchemaRegistry) + expected_definitions = [{"name": "schema1", "schema": {}}, {"name": "schema2", "schema": {}}] + mock_registry.get_all_schemas_for_version.return_value = expected_definitions + + manager = SchemaManager(registry=mock_registry) + result = manager.get_all_schema_definitions(version="v2") + + mock_registry.get_all_schemas_for_version.assert_called_once_with("v2") + assert result == expected_definitions + + +def test_get_schema_by_name_success(): + mock_registry = MagicMock(spec=SchemaRegistry) + mock_schema = {"type": "object"} + mock_registry.get_schema.return_value = mock_schema + + manager = SchemaManager(registry=mock_registry) + result = manager.get_schema_by_name("my_schema", version="v1") + + expected_uri = "https://dify.ai/schemas/v1/my_schema.json" + mock_registry.get_schema.assert_called_once_with(expected_uri) + assert result == {"name": "my_schema", "schema": mock_schema} + + +def test_get_schema_by_name_not_found(): + mock_registry = MagicMock(spec=SchemaRegistry) + mock_registry.get_schema.return_value = None + + manager = SchemaManager(registry=mock_registry) + result = manager.get_schema_by_name("non_existent", version="v1") + + assert result is None + + +def test_list_available_schemas(): + mock_registry = MagicMock(spec=SchemaRegistry) + expected_schemas = ["schema1", "schema2"] + mock_registry.list_schemas.return_value = expected_schemas + + manager = SchemaManager(registry=mock_registry) + result = manager.list_available_schemas(version="v1") + + mock_registry.list_schemas.assert_called_once_with("v1") + assert result == expected_schemas + + +def test_list_available_versions(): + mock_registry = MagicMock(spec=SchemaRegistry) + expected_versions = ["v1", "v2"] + mock_registry.list_versions.return_value = expected_versions + + manager = SchemaManager(registry=mock_registry) + result = manager.list_available_versions() + + mock_registry.list_versions.assert_called_once() + assert result == expected_versions diff --git a/api/tests/unit_tests/core/test_file.py b/api/tests/unit_tests/core/test_file.py index b9c5fbd7d8..251d6fd25e 100644 --- a/api/tests/unit_tests/core/test_file.py +++ b/api/tests/unit_tests/core/test_file.py @@ -1,6 +1,6 @@ import json -from core.workflow.file import File, FileTransferMethod, FileType, FileUploadConfig +from dify_graph.file import File, FileTransferMethod, FileType, FileUploadConfig from models.workflow import Workflow diff --git a/api/tests/unit_tests/core/test_model_manager.py b/api/tests/unit_tests/core/test_model_manager.py index 5a7547e85c..92e4b58473 100644 --- a/api/tests/unit_tests/core/test_model_manager.py +++ b/api/tests/unit_tests/core/test_model_manager.py @@ -6,7 +6,7 @@ from pytest_mock import MockerFixture from core.entities.provider_entities import ModelLoadBalancingConfiguration from core.model_manager import LBModelManager -from core.model_runtime.entities.model_entities import ModelType +from dify_graph.model_runtime.entities.model_entities import ModelType from extensions.ext_redis import redis_client diff --git a/api/tests/unit_tests/core/test_provider_configuration.py b/api/tests/unit_tests/core/test_provider_configuration.py index 636fac7a40..90ed1647aa 100644 --- a/api/tests/unit_tests/core/test_provider_configuration.py +++ b/api/tests/unit_tests/core/test_provider_configuration.py @@ -12,9 +12,9 @@ from core.entities.provider_entities import ( RestrictModel, SystemConfiguration, ) -from core.model_runtime.entities.common_entities import I18nObject -from core.model_runtime.entities.model_entities import ModelType -from core.model_runtime.entities.provider_entities import ( +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.model_entities import ModelType +from dify_graph.model_runtime.entities.provider_entities import ( ConfigurateMethod, CredentialFormSchema, FormOption, diff --git a/api/tests/unit_tests/core/test_provider_manager.py b/api/tests/unit_tests/core/test_provider_manager.py index 3163d53b87..69567c54eb 100644 --- a/api/tests/unit_tests/core/test_provider_manager.py +++ b/api/tests/unit_tests/core/test_provider_manager.py @@ -1,32 +1,34 @@ +from unittest.mock import Mock, PropertyMock, patch + import pytest -from pytest_mock import MockerFixture from core.entities.provider_entities import ModelSettings -from core.model_runtime.entities.model_entities import ModelType from core.provider_manager import ProviderManager +from dify_graph.model_runtime.entities.common_entities import I18nObject +from dify_graph.model_runtime.entities.model_entities import ModelType from models.provider import LoadBalancingModelConfig, ProviderModelSetting @pytest.fixture -def mock_provider_entity(mocker: MockerFixture): - mock_entity = mocker.Mock() +def mock_provider_entity(): + mock_entity = Mock() mock_entity.provider = "openai" mock_entity.configurate_methods = ["predefined-model"] mock_entity.supported_model_types = [ModelType.LLM] # Use PropertyMock to ensure credential_form_schemas is iterable - provider_credential_schema = mocker.Mock() - type(provider_credential_schema).credential_form_schemas = mocker.PropertyMock(return_value=[]) + provider_credential_schema = Mock() + type(provider_credential_schema).credential_form_schemas = PropertyMock(return_value=[]) mock_entity.provider_credential_schema = provider_credential_schema - model_credential_schema = mocker.Mock() - type(model_credential_schema).credential_form_schemas = mocker.PropertyMock(return_value=[]) + model_credential_schema = Mock() + type(model_credential_schema).credential_form_schemas = PropertyMock(return_value=[]) mock_entity.model_credential_schema = model_credential_schema return mock_entity -def test__to_model_settings(mocker: MockerFixture, mock_provider_entity): +def test__to_model_settings(mock_provider_entity): # Mocking the inputs ps = ProviderModelSetting( tenant_id="tenant_id", @@ -63,18 +65,18 @@ def test__to_model_settings(mocker: MockerFixture, mock_provider_entity): load_balancing_model_configs[0].id = "id1" load_balancing_model_configs[1].id = "id2" - mocker.patch( - "core.helper.model_provider_cache.ProviderCredentialsCache.get", return_value={"openai_api_key": "fake_key"} - ) + with patch( + "core.helper.model_provider_cache.ProviderCredentialsCache.get", + return_value={"openai_api_key": "fake_key"}, + ): + provider_manager = ProviderManager() - provider_manager = ProviderManager() - - # Running the method - result = provider_manager._to_model_settings( - provider_entity=mock_provider_entity, - provider_model_settings=provider_model_settings, - load_balancing_model_configs=load_balancing_model_configs, - ) + # Running the method + result = provider_manager._to_model_settings( + provider_entity=mock_provider_entity, + provider_model_settings=provider_model_settings, + load_balancing_model_configs=load_balancing_model_configs, + ) # Asserting that the result is as expected assert len(result) == 1 @@ -87,7 +89,7 @@ def test__to_model_settings(mocker: MockerFixture, mock_provider_entity): assert result[0].load_balancing_configs[1].name == "first" -def test__to_model_settings_only_one_lb(mocker: MockerFixture, mock_provider_entity): +def test__to_model_settings_only_one_lb(mock_provider_entity): # Mocking the inputs ps = ProviderModelSetting( @@ -113,18 +115,18 @@ def test__to_model_settings_only_one_lb(mocker: MockerFixture, mock_provider_ent ] load_balancing_model_configs[0].id = "id1" - mocker.patch( - "core.helper.model_provider_cache.ProviderCredentialsCache.get", return_value={"openai_api_key": "fake_key"} - ) + with patch( + "core.helper.model_provider_cache.ProviderCredentialsCache.get", + return_value={"openai_api_key": "fake_key"}, + ): + provider_manager = ProviderManager() - provider_manager = ProviderManager() - - # Running the method - result = provider_manager._to_model_settings( - provider_entity=mock_provider_entity, - provider_model_settings=provider_model_settings, - load_balancing_model_configs=load_balancing_model_configs, - ) + # Running the method + result = provider_manager._to_model_settings( + provider_entity=mock_provider_entity, + provider_model_settings=provider_model_settings, + load_balancing_model_configs=load_balancing_model_configs, + ) # Asserting that the result is as expected assert len(result) == 1 @@ -135,7 +137,7 @@ def test__to_model_settings_only_one_lb(mocker: MockerFixture, mock_provider_ent assert len(result[0].load_balancing_configs) == 0 -def test__to_model_settings_lb_disabled(mocker: MockerFixture, mock_provider_entity): +def test__to_model_settings_lb_disabled(mock_provider_entity): # Mocking the inputs ps = ProviderModelSetting( tenant_id="tenant_id", @@ -170,18 +172,18 @@ def test__to_model_settings_lb_disabled(mocker: MockerFixture, mock_provider_ent load_balancing_model_configs[0].id = "id1" load_balancing_model_configs[1].id = "id2" - mocker.patch( - "core.helper.model_provider_cache.ProviderCredentialsCache.get", return_value={"openai_api_key": "fake_key"} - ) + with patch( + "core.helper.model_provider_cache.ProviderCredentialsCache.get", + return_value={"openai_api_key": "fake_key"}, + ): + provider_manager = ProviderManager() - provider_manager = ProviderManager() - - # Running the method - result = provider_manager._to_model_settings( - provider_entity=mock_provider_entity, - provider_model_settings=provider_model_settings, - load_balancing_model_configs=load_balancing_model_configs, - ) + # Running the method + result = provider_manager._to_model_settings( + provider_entity=mock_provider_entity, + provider_model_settings=provider_model_settings, + load_balancing_model_configs=load_balancing_model_configs, + ) # Asserting that the result is as expected assert len(result) == 1 @@ -190,3 +192,39 @@ def test__to_model_settings_lb_disabled(mocker: MockerFixture, mock_provider_ent assert result[0].model_type == ModelType.LLM assert result[0].enabled is True assert len(result[0].load_balancing_configs) == 0 + + +def test_get_default_model_uses_first_available_active_model(): + mock_session = Mock() + mock_session.scalar.return_value = None + + provider_configurations = Mock() + provider_configurations.get_models.return_value = [ + Mock(model="gpt-3.5-turbo", provider=Mock(provider="openai")), + Mock(model="gpt-4", provider=Mock(provider="openai")), + ] + + manager = ProviderManager() + with ( + patch("core.provider_manager.db.session", mock_session), + patch.object(manager, "get_configurations", return_value=provider_configurations), + patch("core.provider_manager.ModelProviderFactory") as mock_factory_cls, + ): + mock_factory_cls.return_value.get_provider_schema.return_value = Mock( + provider="openai", + label=I18nObject(en_US="OpenAI", zh_Hans="OpenAI"), + icon_small=I18nObject(en_US="icon_small.png", zh_Hans="icon_small.png"), + supported_model_types=[ModelType.LLM], + ) + + result = manager.get_default_model("tenant-id", ModelType.LLM) + + assert result is not None + assert result.model == "gpt-3.5-turbo" + assert result.provider.provider == "openai" + provider_configurations.get_models.assert_called_once_with(model_type=ModelType.LLM, only_active=True) + mock_session.add.assert_called_once() + saved_default_model = mock_session.add.call_args.args[0] + assert saved_default_model.model_name == "gpt-3.5-turbo" + assert saved_default_model.provider_name == "openai" + mock_session.commit.assert_called_once() diff --git a/api/tests/unit_tests/core/tools/__init__.py b/api/tests/unit_tests/core/tools/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/tools/test_builtin_tool_base.py b/api/tests/unit_tests/core/tools/test_builtin_tool_base.py new file mode 100644 index 0000000000..f123f60a34 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_builtin_tool_base.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +from collections.abc import Generator +from types import SimpleNamespace +from typing import Any +from unittest.mock import patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.builtin_tool.tool import BuiltinTool +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage, ToolProviderType +from dify_graph.model_runtime.entities.message_entities import UserPromptMessage + + +class _BuiltinDummyTool(BuiltinTool): + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + yield self.create_text_message("ok") + + +def _build_tool() -> _BuiltinDummyTool: + entity = ToolEntity( + identity=ToolIdentity(author="author", name="tool-a", label=I18nObject(en_US="tool-a"), provider="provider-a"), + parameters=[], + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER) + return _BuiltinDummyTool(provider="provider-a", entity=entity, runtime=runtime) + + +def test_builtin_tool_fork_and_provider_type(): + tool = _build_tool() + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + assert isinstance(forked, _BuiltinDummyTool) + assert forked.runtime.tenant_id == "tenant-2" + assert tool.tool_provider_type() == ToolProviderType.BUILT_IN + + +def test_invoke_model_calls_model_invocation_utils_invoke(): + tool = _build_tool() + with patch("core.tools.builtin_tool.tool.ModelInvocationUtils.invoke", return_value="result") as mock_invoke: + assert ( + tool.invoke_model( + user_id="u1", + prompt_messages=[UserPromptMessage(content="hello")], + stop=[], + ) + == "result" + ) + mock_invoke.assert_called_once() + + +def test_get_max_tokens_returns_value(): + tool = _build_tool() + with patch("core.tools.builtin_tool.tool.ModelInvocationUtils.get_max_llm_context_tokens", return_value=4096): + assert tool.get_max_tokens() == 4096 + + +def test_get_prompt_tokens_returns_value(): + tool = _build_tool() + with patch("core.tools.builtin_tool.tool.ModelInvocationUtils.calculate_tokens", return_value=7): + assert tool.get_prompt_tokens([UserPromptMessage(content="hello")]) == 7 + + +def test_runtime_none_raises(): + tool = _build_tool() + tool.runtime = None + with pytest.raises(ValueError, match="runtime is required"): + tool.get_max_tokens() + with pytest.raises(ValueError, match="runtime is required"): + tool.get_prompt_tokens([UserPromptMessage(content="hello")]) + + +def test_builtin_tool_summary_short_and_long_content_paths(): + tool = _build_tool() + + with patch.object(_BuiltinDummyTool, "get_max_tokens", return_value=100): + with patch.object(_BuiltinDummyTool, "get_prompt_tokens", return_value=10): + assert tool.summary(user_id="u1", content="short") == "short" + + with patch.object(_BuiltinDummyTool, "get_max_tokens", return_value=10): + with patch.object( + _BuiltinDummyTool, + "get_prompt_tokens", + side_effect=lambda prompt_messages: len(prompt_messages[-1].content), + ): + with patch.object( + _BuiltinDummyTool, + "invoke_model", + return_value=SimpleNamespace(message=SimpleNamespace(content="S")), + ): + result = tool.summary(user_id="u1", content="x" * 30 + "\n" + "y" * 5) + + assert result + assert "S" in result diff --git a/api/tests/unit_tests/core/tools/test_builtin_tool_provider.py b/api/tests/unit_tests/core/tools/test_builtin_tool_provider.py new file mode 100644 index 0000000000..ad6d5906ae --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_builtin_tool_provider.py @@ -0,0 +1,216 @@ +from __future__ import annotations + +from collections.abc import Generator +from typing import Any +from unittest.mock import patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.plugin.entities.plugin_daemon import CredentialType +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.builtin_tool.provider import BuiltinToolProviderController +from core.tools.builtin_tool.tool import BuiltinTool +from core.tools.entities.tool_entities import ToolInvokeMessage, ToolProviderEntity, ToolProviderType +from core.tools.errors import ToolProviderNotFoundError + + +class _FakeBuiltinTool(BuiltinTool): + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + yield self.create_text_message("ok") + + +class _ConcreteBuiltinProvider(BuiltinToolProviderController): + last_validation: tuple[str, dict[str, Any]] | None = None + + def _validate_credentials(self, user_id: str, credentials: dict[str, Any]): + self.last_validation = (user_id, credentials) + + +def _provider_yaml() -> dict[str, Any]: + return { + "identity": { + "author": "Dify", + "name": "fake_provider", + "label": {"en_US": "Fake Provider"}, + "description": {"en_US": "Fake description"}, + "icon": "icon.svg", + "tags": ["utilities"], + }, + "credentials_for_provider": { + "api_key": { + "type": "secret-input", + "required": True, + } + }, + "oauth_schema": { + "client_schema": [ + { + "name": "client_id", + "type": "text-input", + } + ], + "credentials_schema": [ + { + "name": "access_token", + "type": "secret-input", + } + ], + }, + } + + +def _tool_yaml() -> dict[str, Any]: + return { + "identity": { + "author": "Dify", + "name": "tool_a", + "label": {"en_US": "Tool A"}, + }, + "parameters": [], + } + + +def test_builtin_tool_provider_init_load_tools_and_basic_accessors(monkeypatch): + yaml_payloads = [_provider_yaml(), _tool_yaml()] + + def _load_yaml(*args, **kwargs): + return yaml_payloads.pop(0) + + monkeypatch.setattr("core.tools.builtin_tool.provider.load_yaml_file_cached", _load_yaml) + monkeypatch.setattr( + "core.tools.builtin_tool.provider.listdir", + lambda *args, **kwargs: ["tool_a.yaml", "__init__.py", "readme.md"], + ) + monkeypatch.setattr( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + lambda *args, **kwargs: _FakeBuiltinTool, + ) + provider = _ConcreteBuiltinProvider() + + assert provider.get_credentials_schema() + assert provider.get_tools() + assert provider.get_tool("tool_a") is not None + assert provider.get_tool("missing") is None + assert provider.provider_type == ToolProviderType.BUILT_IN + assert provider.tool_labels == ["utilities"] + assert provider.need_credentials is True + + oauth_schema = provider.get_credentials_schema_by_type(CredentialType.OAUTH2) + assert len(oauth_schema) == 1 + api_schema = provider.get_credentials_schema_by_type(CredentialType.API_KEY) + assert len(api_schema) == 1 + assert provider.get_oauth_client_schema()[0].name == "client_id" + assert set(provider.get_supported_credential_types()) == {CredentialType.API_KEY, CredentialType.OAUTH2} + + +def test_builtin_tool_provider_invalid_credential_type_raises(): + with ( + patch( + "core.tools.builtin_tool.provider.load_yaml_file_cached", + side_effect=[_provider_yaml(), _tool_yaml()], + ), + patch("core.tools.builtin_tool.provider.listdir", return_value=["tool_a.yaml"]), + patch( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + return_value=_FakeBuiltinTool, + ), + ): + provider = _ConcreteBuiltinProvider() + + with pytest.raises(ValueError, match="Invalid credential type: invalid"): + provider.get_credentials_schema_by_type("invalid") + + +def test_builtin_tool_provider_validate_credentials_delegates(): + with ( + patch( + "core.tools.builtin_tool.provider.load_yaml_file_cached", + side_effect=[_provider_yaml(), _tool_yaml()], + ), + patch("core.tools.builtin_tool.provider.listdir", return_value=["tool_a.yaml"]), + patch( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + return_value=_FakeBuiltinTool, + ), + ): + provider = _ConcreteBuiltinProvider() + + provider.validate_credentials("user-1", {"api_key": "secret"}) + assert provider.last_validation == ("user-1", {"api_key": "secret"}) + + +def test_builtin_tool_provider_unauthorized_schema_is_empty(): + with ( + patch( + "core.tools.builtin_tool.provider.load_yaml_file_cached", + side_effect=[_provider_yaml(), _tool_yaml()], + ), + patch("core.tools.builtin_tool.provider.listdir", return_value=["tool_a.yaml"]), + patch( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + return_value=_FakeBuiltinTool, + ), + ): + provider = _ConcreteBuiltinProvider() + + assert provider.get_credentials_schema_by_type(CredentialType.UNAUTHORIZED) == [] + + +def test_builtin_tool_provider_init_raises_when_provider_yaml_missing(): + with patch("core.tools.builtin_tool.provider.load_yaml_file_cached", side_effect=RuntimeError("boom")): + with pytest.raises(ToolProviderNotFoundError, match="can not load provider yaml"): + _ConcreteBuiltinProvider() + + +def test_builtin_tool_provider_handles_empty_credentials_and_oauth(): + provider = object.__new__(_ConcreteBuiltinProvider) + provider.tools = [] + provider.entity = ToolProviderEntity.model_validate( + { + "identity": { + "author": "Dify", + "name": "fake_provider", + "label": {"en_US": "Fake Provider"}, + "description": {"en_US": "Fake description"}, + "icon": "icon.svg", + "tags": None, + }, + "credentials_schema": [], + "oauth_schema": None, + }, + ) + + assert provider.get_oauth_client_schema() == [] + assert provider.get_supported_credential_types() == [] + assert provider.need_credentials is False + assert provider._get_tool_labels() == [] + + +def test_builtin_tool_provider_forked_tool_runtime_is_initialized(): + with ( + patch( + "core.tools.builtin_tool.provider.load_yaml_file_cached", + side_effect=[_provider_yaml(), _tool_yaml()], + ), + patch("core.tools.builtin_tool.provider.listdir", return_value=["tool_a.yaml"]), + patch( + "core.tools.builtin_tool.provider.load_single_subclass_from_source", + return_value=_FakeBuiltinTool, + ), + ): + provider = _ConcreteBuiltinProvider() + + tool = provider.get_tool("tool_a") + assert tool is not None + assert isinstance(tool.runtime, ToolRuntime) + assert tool.runtime.tenant_id == "" + tool.runtime.invoke_from = InvokeFrom.DEBUGGER + assert tool.runtime.invoke_from == InvokeFrom.DEBUGGER diff --git a/api/tests/unit_tests/core/tools/test_builtin_tools_extra.py b/api/tests/unit_tests/core/tools/test_builtin_tools_extra.py new file mode 100644 index 0000000000..62cfb6ce5b --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_builtin_tools_extra.py @@ -0,0 +1,310 @@ +from __future__ import annotations + +import math +from types import SimpleNamespace + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.builtin_tool.providers._positions import BuiltinToolProviderSort +from core.tools.builtin_tool.providers.audio.audio import AudioToolProvider +from core.tools.builtin_tool.providers.audio.tools.asr import ASRTool +from core.tools.builtin_tool.providers.audio.tools.tts import TTSTool +from core.tools.builtin_tool.providers.code.code import CodeToolProvider +from core.tools.builtin_tool.providers.code.tools.simple_code import SimpleCode +from core.tools.builtin_tool.providers.time.time import WikiPediaProvider +from core.tools.builtin_tool.providers.time.tools.current_time import CurrentTimeTool +from core.tools.builtin_tool.providers.time.tools.localtime_to_timestamp import LocaltimeToTimestampTool +from core.tools.builtin_tool.providers.time.tools.timestamp_to_localtime import TimestampToLocaltimeTool +from core.tools.builtin_tool.providers.time.tools.timezone_conversion import TimezoneConversionTool +from core.tools.builtin_tool.providers.time.tools.weekday import WeekdayTool +from core.tools.builtin_tool.providers.webscraper.tools.webscraper import WebscraperTool +from core.tools.builtin_tool.providers.webscraper.webscraper import WebscraperProvider +from core.tools.builtin_tool.tool import BuiltinTool +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage +from core.tools.errors import ToolInvokeError +from dify_graph.file.enums import FileType +from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey + + +def _build_builtin_tool(tool_cls: type[BuiltinTool]) -> BuiltinTool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="tool-a", + label=I18nObject(en_US="tool-a"), + provider="provider-a", + ), + parameters=[], + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER) + return tool_cls(provider="provider-a", entity=entity, runtime=runtime) + + +def _raise_runtime_error(*_args: object, **_kwargs: object) -> None: + raise RuntimeError("boom") + + +def test_current_time_tool(): + current_tool = _build_builtin_tool(CurrentTimeTool) + utc_text = list(current_tool.invoke(user_id="u", tool_parameters={"timezone": "UTC"}))[0].message.text + assert utc_text + + invalid_tz = list(current_tool.invoke(user_id="u", tool_parameters={"timezone": "Invalid/TZ"}))[0].message.text + assert "Invalid timezone" in invalid_tz + + +def test_localtime_to_timestamp_tool(): + localtime_tool = _build_builtin_tool(LocaltimeToTimestampTool) + ts_message = list( + localtime_tool.invoke(user_id="u", tool_parameters={"localtime": "2024-01-01 10:00:00", "timezone": "UTC"}) + )[0].message.text + ts_value = float(ts_message.strip()) + assert math.isfinite(ts_value) + assert ts_value >= 0 + with pytest.raises(ToolInvokeError): + LocaltimeToTimestampTool.localtime_to_timestamp("bad", "%Y-%m-%d %H:%M:%S", "UTC") + + +def test_timestamp_to_localtime_tool(): + to_local_tool = _build_builtin_tool(TimestampToLocaltimeTool) + local_text = list(to_local_tool.invoke(user_id="u", tool_parameters={"timestamp": 1704067200, "timezone": "UTC"}))[ + 0 + ].message.text + assert "2024" in local_text + with pytest.raises(ToolInvokeError): + TimestampToLocaltimeTool.timestamp_to_localtime("bad", "UTC") # type: ignore[arg-type] + + +def test_timezone_conversion_tool(): + timezone_tool = _build_builtin_tool(TimezoneConversionTool) + converted = list( + timezone_tool.invoke( + user_id="u", + tool_parameters={ + "current_time": "2024-01-01 08:00:00", + "current_timezone": "UTC", + "target_timezone": "Asia/Tokyo", + }, + ) + )[0].message.text + assert converted.startswith("2024-01-01") + with pytest.raises(ToolInvokeError): + TimezoneConversionTool.timezone_convert("bad", "UTC", "Asia/Tokyo") + + +def test_weekday_tool(): + weekday_tool = _build_builtin_tool(WeekdayTool) + valid = list(weekday_tool.invoke(user_id="u", tool_parameters={"year": 2024, "month": 1, "day": 1}))[0].message.text + assert "January 1, 2024" in valid + invalid = list(weekday_tool.invoke(user_id="u", tool_parameters={"year": 2024, "month": 2, "day": 31}))[ + 0 + ].message.text + assert "Invalid date" in invalid + with pytest.raises(ValueError, match="Month is required"): + list(weekday_tool.invoke(user_id="u", tool_parameters={"year": 2024, "day": 1})) + + +def test_simple_code_valid_execution(monkeypatch): + simple_code = _build_builtin_tool(SimpleCode) + + monkeypatch.setattr( + "core.tools.builtin_tool.providers.code.tools.simple_code.CodeExecutor.execute_code", + lambda *a: "ok", + ) + result = list( + simple_code.invoke( + user_id="u", + tool_parameters={"language": "python3", "code": "print(1)"}, + ) + )[0].message.text + assert result == "ok" + + +def test_simple_code_invalid_language(): + simple_code = _build_builtin_tool(SimpleCode) + + with pytest.raises(ValueError, match="Only python3 and javascript"): + list(simple_code.invoke(user_id="u", tool_parameters={"language": "go", "code": "fmt.Println(1)"})) + + +def test_simple_code_execution_error(monkeypatch): + simple_code = _build_builtin_tool(SimpleCode) + + monkeypatch.setattr( + "core.tools.builtin_tool.providers.code.tools.simple_code.CodeExecutor.execute_code", + _raise_runtime_error, + ) + with pytest.raises(ToolInvokeError, match="boom"): + list(simple_code.invoke(user_id="u", tool_parameters={"language": "python3", "code": "print(1)"})) + + +def test_webscraper_empty_url(): + webscraper = _build_builtin_tool(WebscraperTool) + empty = list(webscraper.invoke(user_id="u", tool_parameters={"url": ""}))[0].message.text + assert empty == "Please input url" + + +def test_webscraper_fetch(monkeypatch): + webscraper = _build_builtin_tool(WebscraperTool) + monkeypatch.setattr("core.tools.builtin_tool.providers.webscraper.tools.webscraper.get_url", lambda *a, **k: "page") + full = list(webscraper.invoke(user_id="u", tool_parameters={"url": "https://example.com"}))[0].message.text + assert full == "page" + + +def test_webscraper_summary(monkeypatch): + webscraper = _build_builtin_tool(WebscraperTool) + monkeypatch.setattr("core.tools.builtin_tool.providers.webscraper.tools.webscraper.get_url", lambda *a, **k: "page") + monkeypatch.setattr(webscraper, "summary", lambda user_id, content: "summary") + summarized = list( + webscraper.invoke( + user_id="u", + tool_parameters={"url": "https://example.com", "generate_summary": True}, + ) + )[0].message.text + assert summarized == "summary" + + +def test_webscraper_fetch_error(monkeypatch): + webscraper = _build_builtin_tool(WebscraperTool) + monkeypatch.setattr( + "core.tools.builtin_tool.providers.webscraper.tools.webscraper.get_url", + _raise_runtime_error, + ) + with pytest.raises(ToolInvokeError, match="boom"): + list(webscraper.invoke(user_id="u", tool_parameters={"url": "https://example.com"})) + + +def test_asr_invalid_file(): + asr = _build_builtin_tool(ASRTool) + file_obj = SimpleNamespace(type=FileType.DOCUMENT) + invalid_file = list(asr.invoke(user_id="u", tool_parameters={"audio_file": file_obj}))[0].message.text + assert "not a valid audio file" in invalid_file + + +def test_asr_valid_file_invocation(monkeypatch): + asr = _build_builtin_tool(ASRTool) + model_instance = type("M", (), {"invoke_speech2text": lambda self, file, user: "transcript"})() + model_manager = type("Mgr", (), {"get_model_instance": lambda *a, **k: model_instance})() + monkeypatch.setattr("core.tools.builtin_tool.providers.audio.tools.asr.download", lambda file: b"audio-bytes") + monkeypatch.setattr("core.tools.builtin_tool.providers.audio.tools.asr.ModelManager", lambda: model_manager) + audio_file = SimpleNamespace(type=FileType.AUDIO) + ok = list(asr.invoke(user_id="u", tool_parameters={"audio_file": audio_file, "model": "p#m"}))[0].message.text + assert ok == "transcript" + + +def test_asr_available_models_and_runtime_parameters(monkeypatch): + asr = _build_builtin_tool(ASRTool) + provider_model = type("PM", (), {"provider": "p", "models": [type("Model", (), {"model": "m"})()]})() + monkeypatch.setattr( + "core.tools.builtin_tool.providers.audio.tools.asr.ModelProviderService.get_models_by_model_type", + lambda *a, **k: [provider_model], + ) + assert asr.get_available_models() == [("p", "m")] + assert asr.get_runtime_parameters()[0].name == "model" + + +def test_tts_invoke_returns_messages(monkeypatch): + tts = _build_builtin_tool(TTSTool) + voices_model_instance = type( + "TTSM", + (), + { + "get_tts_voices": lambda self: [{"value": "voice-1"}], + "invoke_tts": lambda self, **kwargs: [b"a", b"b"], + }, + )() + monkeypatch.setattr( + "core.tools.builtin_tool.providers.audio.tools.tts.ModelManager", + lambda: type("M", (), {"get_model_instance": lambda *a, **k: voices_model_instance})(), + ) + messages = list(tts.invoke(user_id="u", tool_parameters={"model": "p#m", "text": "hello"})) + assert [m.type for m in messages] == [ToolInvokeMessage.MessageType.TEXT, ToolInvokeMessage.MessageType.BLOB] + + +def test_tts_get_available_models_requires_runtime(): + tts = _build_builtin_tool(TTSTool) + tts.runtime = None + with pytest.raises(ValueError, match="Runtime is required"): + tts.get_available_models() + + +def test_tts_tool_raises_when_runtime_missing(): + tts = _build_builtin_tool(TTSTool) + tts.runtime = None + with pytest.raises(ValueError, match="Runtime is required"): + list(tts.invoke(user_id="u", tool_parameters={"model": "p#m", "text": "hello"})) + + +@pytest.mark.parametrize( + "voices", + [[{"value": None}], []], +) +def test_tts_tool_raises_when_voice_unavailable(monkeypatch, voices): + tts = _build_builtin_tool(TTSTool) + tts.runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER) + model_without_voice = type( + "TTSModelNoVoice", + (), + { + "get_tts_voices": lambda self: voices, + "invoke_tts": lambda self, **kwargs: [b"x"], + }, + )() + monkeypatch.setattr( + "core.tools.builtin_tool.providers.audio.tools.tts.ModelManager", + lambda: type("Manager", (), {"get_model_instance": lambda *args, **kwargs: model_without_voice})(), + ) + with pytest.raises(ValueError, match="no voice available"): + list(tts.invoke(user_id="u", tool_parameters={"model": "p#m", "text": "hello"})) + + +def test_tts_tool_get_available_models_and_runtime_parameters(monkeypatch): + tts = _build_builtin_tool(TTSTool) + + model_1 = SimpleNamespace( + model="model-a", + model_properties={ModelPropertyKey.VOICES: [{"mode": "v1", "name": "Voice 1"}]}, + ) + model_2 = SimpleNamespace(model="model-b", model_properties={}) + provider_models = [SimpleNamespace(provider="provider-a", models=[model_1, model_2])] + monkeypatch.setattr( + "core.tools.builtin_tool.providers.audio.tools.tts.ModelProviderService.get_models_by_model_type", + lambda *args, **kwargs: provider_models, + ) + + available_models = tts.get_available_models() + assert available_models == [ + ("provider-a", "model-a", [{"mode": "v1", "name": "Voice 1"}]), + ("provider-a", "model-b", []), + ] + + runtime_parameters = tts.get_runtime_parameters() + assert runtime_parameters[0].name == "model" + assert runtime_parameters[0].required is True + assert runtime_parameters[0].options[0].value == "provider-a#model-a" + assert runtime_parameters[1].name == "voice#provider-a#model-a" + + +def test_provider_classes_and_builtin_sort(monkeypatch): + # Use object.__new__ to avoid YAML-loading __init__; only pass-through validation is exercised. + # Ensure pass-through _validate_credentials methods are executed. + AudioToolProvider._validate_credentials(object.__new__(AudioToolProvider), "u", {}) + CodeToolProvider._validate_credentials(object.__new__(CodeToolProvider), "u", {}) + WikiPediaProvider._validate_credentials(object.__new__(WikiPediaProvider), "u", {}) + WebscraperProvider._validate_credentials(object.__new__(WebscraperProvider), "u", {}) + + providers = [SimpleNamespace(name="b"), SimpleNamespace(name="a")] + monkeypatch.setattr(BuiltinToolProviderSort, "_position", {}) + monkeypatch.setattr( + "core.tools.builtin_tool.providers._positions.get_tool_position_map", + lambda _: {"a": 0, "b": 1}, + ) + monkeypatch.setattr( + "core.tools.builtin_tool.providers._positions.sort_by_position_map", + lambda position, values, name_func: sorted(values, key=lambda x: name_func(x)), + ) + sorted_providers = BuiltinToolProviderSort.sort(providers) + assert [p.name for p in sorted_providers] == ["a", "b"] diff --git a/api/tests/unit_tests/core/tools/test_custom_tool.py b/api/tests/unit_tests/core/tools/test_custom_tool.py new file mode 100644 index 0000000000..79b8eaaa87 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_custom_tool.py @@ -0,0 +1,285 @@ +from __future__ import annotations + +from types import SimpleNamespace + +import httpx +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.custom_tool.tool import ApiTool, ParsedResponse +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_bundle import ApiToolBundle +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage +from core.tools.errors import ToolInvokeError, ToolParameterValidationError, ToolProviderCredentialValidationError + + +def _build_tool(*, openapi: dict | None = None) -> ApiTool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="tool-a", + label=I18nObject(en_US="tool-a"), + provider="provider-a", + ), + parameters=[], + ) + bundle = ApiToolBundle( + server_url="https://api.example.com/items/{id}", + method="GET", + summary="summary", + operation_id="op-id", + parameters=[], + author="author", + openapi=openapi or {"parameters": []}, + ) + runtime = ToolRuntime( + tenant_id="tenant-1", + invoke_from=InvokeFrom.DEBUGGER, + credentials={"auth_type": "api_key_header", "api_key_value": "k"}, + ) + return ApiTool(entity=entity, api_bundle=bundle, runtime=runtime, provider_id="provider-id") + + +def test_parsed_response_to_string(): + assert ParsedResponse({"a": 1}, True).to_string() == '{"a": 1}' + assert ParsedResponse("ok", False).to_string() == "ok" + + +def test_api_tool_fork_runtime_and_validate_credentials(monkeypatch): + tool = _build_tool() + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + assert isinstance(forked, ApiTool) + assert forked.runtime.tenant_id == "tenant-2" + + tool.api_bundle = None # type: ignore[assignment] + with pytest.raises(ValueError, match="api_bundle is required"): + tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + + tool = _build_tool() + assert tool.validate_credentials(credentials={}, parameters={}, format_only=True) == "" + monkeypatch.setattr(tool, "assembling_request", lambda parameters: {"Authorization": "Bearer x"}) + monkeypatch.setattr( + tool, + "do_http_request", + lambda url, method, headers, parameters: httpx.Response(200, json={"ok": True}), + ) + result = tool.validate_credentials(credentials={}, parameters={"a": 1}, format_only=False) + assert result == '{"ok": true}' + + +def test_assembling_request_auth_header_assembly(): + tool = _build_tool() + + headers = tool.assembling_request(parameters={}) + assert headers["Authorization"] == "k" + + tool.runtime.credentials = { + "auth_type": "api_key_header", + "api_key_header_prefix": "bearer", + "api_key_value": "abc", + } + headers = tool.assembling_request(parameters={}) + assert headers["Authorization"] == "Bearer abc" + + tool.runtime.credentials = {"auth_type": "api_key_header", "api_key_header_prefix": "basic", "api_key_value": "abc"} + headers = tool.assembling_request(parameters={}) + assert headers["Authorization"] == "Basic abc" + + tool.runtime.credentials = {"auth_type": "api_key_query", "api_key_value": "abc"} + assert tool.assembling_request(parameters={}) == {} + + +def test_assembling_request_runtime_auth_errors(): + tool = _build_tool() + + tool.runtime = None + with pytest.raises(ToolProviderCredentialValidationError, match="runtime not initialized"): + tool.assembling_request(parameters={}) + + tool.runtime = ToolRuntime(tenant_id="tenant", credentials={}) + with pytest.raises(ToolProviderCredentialValidationError, match="Missing auth_type"): + tool.assembling_request(parameters={}) + + tool.runtime.credentials = {"auth_type": "api_key_header"} + with pytest.raises(ToolProviderCredentialValidationError, match="Missing api_key_value"): + tool.assembling_request(parameters={}) + + tool.runtime.credentials = {"auth_type": "api_key_header", "api_key_value": 123} + with pytest.raises(ToolProviderCredentialValidationError, match="must be a string"): + tool.assembling_request(parameters={}) + + +def test_assembling_request_parameter_validation_and_defaults(): + tool = _build_tool() + + tool.runtime.credentials = {"auth_type": "api_key_header", "api_key_value": "x"} + tool.api_bundle.parameters = [ + SimpleNamespace(required=True, name="required_param", default=None), + ] + with pytest.raises(ToolParameterValidationError, match="Missing required parameter required_param"): + tool.assembling_request(parameters={}) + + tool.api_bundle.parameters = [ + SimpleNamespace(required=True, name="required_param", default="d"), + ] + params = {} + tool.assembling_request(parameters=params) + assert params["required_param"] == "d" + + +def test_validate_and_parse_response_branches(): + tool = _build_tool() + + with pytest.raises(ToolInvokeError, match="status code 500"): + tool.validate_and_parse_response(httpx.Response(500, text="boom")) + + empty = tool.validate_and_parse_response(httpx.Response(200, content=b"")) + assert empty.is_json is False + assert "Empty response from the tool" in str(empty.content) + + json_resp = tool.validate_and_parse_response( + httpx.Response(200, json={"a": 1}, headers={"content-type": "application/json"}) + ) + assert json_resp.is_json is True + assert json_resp.content == {"a": 1} + + non_json_type = tool.validate_and_parse_response( + httpx.Response(200, text='{"a": 1}', headers={"content-type": "text/plain"}) + ) + assert non_json_type.is_json is False + assert non_json_type.content == '{"a": 1}' + + plain_resp = tool.validate_and_parse_response(httpx.Response(200, text="plain")) + assert plain_resp.is_json is False + assert plain_resp.content == "plain" + + with pytest.raises(ValueError, match="Invalid response type"): + tool.validate_and_parse_response("invalid") # type: ignore[arg-type] + + +def test_get_parameter_value_and_type_conversion_helpers(): + tool = _build_tool() + + assert tool.get_parameter_value({"name": "x"}, {"x": 1}) == 1 + assert tool.get_parameter_value({"name": "x", "required": False, "schema": {"default": "d"}}, {}) == "d" + with pytest.raises(ToolParameterValidationError, match="Missing required parameter x"): + tool.get_parameter_value({"name": "x", "required": True}, {}) + + assert tool._convert_body_property_any_of({}, "12", [{"type": "integer"}]) == 12 + assert tool._convert_body_property_any_of({}, "1.5", [{"type": "number"}]) == 1.5 + assert tool._convert_body_property_any_of({}, "true", [{"type": "boolean"}]) is True + assert tool._convert_body_property_any_of({}, "", [{"type": "null"}]) is None + assert tool._convert_body_property_any_of({}, "x", [{"anyOf": [{"type": "string"}]}]) == "x" + + assert tool._convert_body_property_type({"type": "integer"}, "1") == 1 + assert tool._convert_body_property_type({"type": "number"}, "1.2") == 1.2 + assert tool._convert_body_property_type({"type": "string"}, 1) == "1" + assert tool._convert_body_property_type({"type": "boolean"}, 1) is True + assert tool._convert_body_property_type({"type": "null"}, None) is None + assert tool._convert_body_property_type({"type": "object"}, '{"a":1}') == {"a": 1} + assert tool._convert_body_property_type({"type": "array"}, "[1,2]") == [1, 2] + assert tool._convert_body_property_type({"type": "invalid"}, "v") == "v" + assert tool._convert_body_property_type({"anyOf": [{"type": "integer"}]}, "2") == 2 + + +def test_do_http_request_builds_arguments_and_handles_invalid_method(monkeypatch): + openapi = { + "parameters": [ + {"name": "id", "in": "path", "required": True, "schema": {"type": "string"}}, + {"name": "q", "in": "query", "required": False, "schema": {"default": ""}}, + {"name": "X-Extra", "in": "header", "required": False, "schema": {"default": "x"}}, + {"name": "sid", "in": "cookie", "required": False, "schema": {"default": "cookie1"}}, + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "type": "object", + "required": ["count"], + "properties": { + "count": {"type": "integer"}, + "name": {"type": "string", "default": "n"}, + }, + } + } + } + }, + } + tool = _build_tool(openapi=openapi) + tool.runtime.credentials = {"auth_type": "api_key_query", "api_key_query_param": "key", "api_key_value": "v"} + headers = {} + captured = {} + + def _fake_get(url, **kwargs): + captured["url"] = url + captured["kwargs"] = kwargs + return httpx.Response(200, text="ok") + + monkeypatch.setattr("core.tools.custom_tool.tool.ssrf_proxy.get", _fake_get) + response = tool.do_http_request( + "https://api.example.com/items/{id}", + "GET", + headers=headers, + parameters={"id": "123", "count": "2", "q": "search"}, + ) + + assert isinstance(response, httpx.Response) + assert captured["url"].endswith("/items/123") + assert captured["kwargs"]["params"]["q"] == "search" + assert captured["kwargs"]["params"]["key"] == "v" + assert captured["kwargs"]["headers"]["Content-Type"] == "application/json" + + invalid_method_tool = _build_tool(openapi={"parameters": []}) + with pytest.raises(ValueError, match="Invalid http method"): + invalid_method_tool.do_http_request("https://api.example.com", "TRACE", headers={}, parameters={}) + + +def test_do_http_request_handles_file_upload_and_invoke_paths(monkeypatch): + openapi = { + "parameters": [], + "requestBody": { + "content": { + "multipart/form-data": { + "schema": { + "type": "object", + "properties": {"file": {"format": "binary"}}, + } + } + } + }, + } + tool = _build_tool(openapi=openapi) + tool.runtime.credentials = {"auth_type": "api_key_header", "api_key_value": "k"} + fake_file = SimpleNamespace(filename="a.txt", mime_type="text/plain") + captured = {} + + def _fake_post(url, **kwargs): + captured["headers"] = kwargs["headers"] + captured["files"] = kwargs["files"] + return httpx.Response(200, text="ok") + + monkeypatch.setattr("core.tools.custom_tool.tool.download", lambda _: b"file-bytes") + monkeypatch.setattr("core.tools.custom_tool.tool.ssrf_proxy.post", _fake_post) + response = tool.do_http_request( + "https://api.example.com/upload", + "POST", + headers={}, + parameters={"file": fake_file}, + ) + assert isinstance(response, httpx.Response) + assert "Content-Type" not in captured["headers"] + assert captured["files"][0][0] == "file" + + # _invoke JSON path + monkeypatch.setattr(tool, "assembling_request", lambda parameters: {}) + monkeypatch.setattr(tool, "do_http_request", lambda *args, **kwargs: httpx.Response(200, text='{"a":1}')) + monkeypatch.setattr(tool, "validate_and_parse_response", lambda _: ParsedResponse({"a": 1}, True)) + messages = list(tool.invoke(user_id="u1", tool_parameters={})) + assert [m.type for m in messages] == [ToolInvokeMessage.MessageType.JSON, ToolInvokeMessage.MessageType.TEXT] + + # _invoke text path + monkeypatch.setattr(tool, "validate_and_parse_response", lambda _: ParsedResponse("plain", False)) + messages = list(tool.invoke(user_id="u1", tool_parameters={})) + assert len(messages) == 1 + assert messages[0].message.text == "plain" diff --git a/api/tests/unit_tests/core/tools/test_custom_tool_provider.py b/api/tests/unit_tests/core/tools/test_custom_tool_provider.py new file mode 100644 index 0000000000..93ae217e24 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_custom_tool_provider.py @@ -0,0 +1,75 @@ +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest + +from core.tools.custom_tool.provider import ApiToolProviderController +from core.tools.custom_tool.tool import ApiTool +from core.tools.entities.tool_bundle import ApiToolBundle +from core.tools.entities.tool_entities import ApiProviderAuthType, ToolProviderType + + +def _db_provider() -> SimpleNamespace: + bundle = ApiToolBundle( + server_url="https://api.example.com/items", + method="GET", + summary="List items", + operation_id="list_items", + parameters=[], + author="author", + openapi={"parameters": []}, + ) + return SimpleNamespace( + id="provider-id", + tenant_id="tenant-1", + name="provider-a", + description="desc", + icon="icon.svg", + user=SimpleNamespace(name="Alice"), + tools=[bundle], + ) + + +def test_api_tool_provider_from_db_and_parse_tool_bundle(): + controller = ApiToolProviderController.from_db(_db_provider(), ApiProviderAuthType.API_KEY_HEADER) + assert controller.provider_type == ToolProviderType.API + assert any(c.name == "api_key_value" for c in controller.entity.credentials_schema) + + tool = controller._parse_tool_bundle(_db_provider().tools[0]) + assert isinstance(tool, ApiTool) + assert tool.entity.identity.provider == "provider-id" + + +def test_api_tool_provider_from_db_query_auth_and_none_auth(): + query_controller = ApiToolProviderController.from_db(_db_provider(), ApiProviderAuthType.API_KEY_QUERY) + assert any(c.name == "api_key_query_param" for c in query_controller.entity.credentials_schema) + + none_controller = ApiToolProviderController.from_db(_db_provider(), ApiProviderAuthType.NONE) + assert [c.name for c in none_controller.entity.credentials_schema] == ["auth_type"] + + +def test_api_tool_provider_load_get_tools_and_get_tool(): + controller = ApiToolProviderController.from_db(_db_provider(), ApiProviderAuthType.NONE) + loaded = controller.load_bundled_tools(_db_provider().tools) + assert len(loaded) == 1 + + assert isinstance(controller.get_tool("list_items"), ApiTool) + + with pytest.raises(ValueError, match="not found"): + controller.get_tool("missing") + + # Return cached tools without querying database. + cached = controller.get_tools("tenant-1") + assert len(cached) == 1 + + # Force DB fetch branch. + controller.tools = [] + provider_with_tools = _db_provider() + with patch("core.tools.custom_tool.provider.db") as mock_db: + scalars_result = Mock() + scalars_result.all.return_value = [provider_with_tools] + mock_db.session.scalars.return_value = scalars_result + tools = controller.get_tools("tenant-1") + assert len(tools) == 1 diff --git a/api/tests/unit_tests/core/tools/test_dataset_retriever_tool.py b/api/tests/unit_tests/core/tools/test_dataset_retriever_tool.py new file mode 100644 index 0000000000..23c0be9487 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_dataset_retriever_tool.py @@ -0,0 +1,145 @@ +"""Unit tests for DatasetRetrieverTool behavior and retrieval wiring.""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import Mock, patch + +from core.app.app_config.entities import DatasetRetrieveConfigEntity +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.utils.dataset_retriever_tool import DatasetRetrieverTool + + +def _retrieve_config() -> DatasetRetrieveConfigEntity: + return DatasetRetrieveConfigEntity(retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE) + + +def test_get_dataset_tools_returns_empty_for_empty_dataset_ids() -> None: + # Arrange + retrieve_config = _retrieve_config() + + # Act + tools = DatasetRetrieverTool.get_dataset_tools( + tenant_id="tenant", + dataset_ids=[], + retrieve_config=retrieve_config, + return_resource=False, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="u", + inputs={}, + ) + + # Assert + assert tools == [] + + +def test_get_dataset_tools_returns_empty_for_missing_retrieve_config() -> None: + # Arrange + dataset_ids = ["d1"] + + # Act + tools = DatasetRetrieverTool.get_dataset_tools( + tenant_id="tenant", + dataset_ids=dataset_ids, + retrieve_config=None, # type: ignore[arg-type] + return_resource=False, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="u", + inputs={}, + ) + + # Assert + assert tools == [] + + +def test_get_dataset_tools_builds_tool_and_restores_strategy() -> None: + # Arrange + retrieve_config = _retrieve_config() + retrieval_tool = SimpleNamespace(name="dataset_tool", description="desc", run=lambda query: f"result:{query}") + feature = Mock() + feature.to_dataset_retriever_tool.return_value = [retrieval_tool] + + # Act + with patch("core.tools.utils.dataset_retriever_tool.DatasetRetrieval", return_value=feature): + tools = DatasetRetrieverTool.get_dataset_tools( + tenant_id="tenant", + dataset_ids=["d1"], + retrieve_config=retrieve_config, + return_resource=True, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="u", + inputs={"x": 1}, + ) + + # Assert + assert len(tools) == 1 + assert tools[0].entity.identity.name == "dataset_tool" + assert retrieve_config.retrieve_strategy == DatasetRetrieveConfigEntity.RetrieveStrategy.MULTIPLE + + +def _build_dataset_tool() -> tuple[DatasetRetrieverTool, SimpleNamespace]: + retrieval_tool = SimpleNamespace(name="dataset_tool", description="desc", run=lambda query: f"result:{query}") + feature = Mock() + feature.to_dataset_retriever_tool.return_value = [retrieval_tool] + with patch("core.tools.utils.dataset_retriever_tool.DatasetRetrieval", return_value=feature): + tools = DatasetRetrieverTool.get_dataset_tools( + tenant_id="tenant", + dataset_ids=["d1"], + retrieve_config=_retrieve_config(), + return_resource=False, + invoke_from=InvokeFrom.DEBUGGER, + hit_callback=Mock(), + user_id="u", + inputs={}, + ) + return tools[0], retrieval_tool + + +def test_runtime_parameters_shape() -> None: + # Arrange + tool, _ = _build_dataset_tool() + + # Act + params = tool.get_runtime_parameters() + + # Assert + assert len(params) == 1 + assert params[0].name == "query" + + +def test_empty_query_behavior() -> None: + # Arrange + tool, _ = _build_dataset_tool() + + # Act + empty_query = list(tool.invoke(user_id="u", tool_parameters={})) + + # Assert + assert len(empty_query) == 1 + assert empty_query[0].message.text == "please input query" + + +def test_query_invocation_result() -> None: + # Arrange + tool, _ = _build_dataset_tool() + + # Act + result = list(tool.invoke(user_id="u", tool_parameters={"query": "hello"})) + + # Assert + assert len(result) == 1 + assert result[0].message.text == "result:hello" + + +def test_validate_credentials() -> None: + # Arrange + tool, _ = _build_dataset_tool() + + # Act + result = tool.validate_credentials(credentials={}, parameters={}, format_only=False) + + # Assert + assert result is None diff --git a/api/tests/unit_tests/core/tools/test_mcp_tool.py b/api/tests/unit_tests/core/tools/test_mcp_tool.py new file mode 100644 index 0000000000..eaf054de59 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_mcp_tool.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +import base64 +from unittest.mock import patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.mcp.types import ( + BlobResourceContents, + CallToolResult, + EmbeddedResource, + ImageContent, + TextContent, + TextResourceContents, +) +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage, ToolProviderType +from core.tools.errors import ToolInvokeError +from core.tools.mcp_tool.tool import MCPTool + + +def _build_mcp_tool(*, with_output_schema: bool = True) -> MCPTool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="remote-tool", + label=I18nObject(en_US="remote-tool"), + provider="provider-id", + ), + parameters=[], + output_schema={"type": "object"} if with_output_schema else {}, + ) + return MCPTool( + entity=entity, + runtime=ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER), + tenant_id="tenant-1", + icon="icon.svg", + server_url="https://mcp.example.com", + provider_id="provider-id", + headers={"x-auth": "token"}, + ) + + +def test_mcp_tool_provider_type_and_fork_runtime(): + tool = _build_mcp_tool() + assert tool.tool_provider_type() == ToolProviderType.MCP + + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + assert isinstance(forked, MCPTool) + assert forked.runtime.tenant_id == "tenant-2" + assert forked.provider_id == "provider-id" + + +def test_mcp_tool_text_and_json_processing_helpers(): + tool = _build_mcp_tool() + + json_messages = list(tool._process_text_content(TextContent(type="text", text='{"a": 1}'))) + assert json_messages[0].type == ToolInvokeMessage.MessageType.JSON + + plain_messages = list(tool._process_text_content(TextContent(type="text", text="not-json"))) + assert plain_messages[0].type == ToolInvokeMessage.MessageType.TEXT + assert plain_messages[0].message.text == "not-json" + + list_messages = list(tool._process_json_content([{"k": 1}, {"k": 2}])) + assert [m.type for m in list_messages] == [ToolInvokeMessage.MessageType.JSON, ToolInvokeMessage.MessageType.JSON] + + mixed_list_messages = list(tool._process_json_list([{"k": 1}, 2])) + assert len(mixed_list_messages) == 1 + assert mixed_list_messages[0].type == ToolInvokeMessage.MessageType.TEXT + + primitive_messages = list(tool._process_json_content(123)) + assert primitive_messages[0].message.text == "123" + + +def test_mcp_tool_usage_extraction_helpers(): + usage = MCPTool._extract_usage_dict({"usage": {"total_tokens": 9}}) + assert usage == {"total_tokens": 9} + + usage = MCPTool._extract_usage_dict({"metadata": {"usage": {"prompt_tokens": 3, "completion_tokens": 2}}}) + assert usage == {"prompt_tokens": 3, "completion_tokens": 2} + + usage = MCPTool._extract_usage_dict({"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3}) + assert usage == {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3} + + usage = MCPTool._extract_usage_dict({"nested": [{"deep": {"usage": {"total_tokens": 7}}}]}) + assert usage == {"total_tokens": 7} + + result_with_usage = CallToolResult(content=[], _meta={"usage": {"prompt_tokens": 1, "completion_tokens": 2}}) + derived = MCPTool._derive_usage_from_result(result_with_usage) + assert derived.prompt_tokens == 1 + assert derived.completion_tokens == 2 + + result_without_usage = CallToolResult(content=[], _meta=None) + derived = MCPTool._derive_usage_from_result(result_without_usage) + assert derived.total_tokens == 0 + + +def test_mcp_tool_invoke_handles_content_types_and_structured_output(): + tool = _build_mcp_tool() + img_data = base64.b64encode(b"img").decode() + blob_data = base64.b64encode(b"blob").decode() + result = CallToolResult( + content=[ + TextContent(type="text", text='{"a": 1}'), + ImageContent(type="image", data=img_data, mimeType="image/png"), + EmbeddedResource( + type="resource", + resource=TextResourceContents(uri="file:///tmp/a.txt", text="embedded-text"), + ), + EmbeddedResource( + type="resource", + resource=BlobResourceContents( + uri="file:///tmp/b.bin", + blob=blob_data, + mimeType="application/octet-stream", + ), + ), + ], + structuredContent={"x": 1}, + _meta={"usage": {"prompt_tokens": 2, "completion_tokens": 3}}, + ) + + with patch.object(MCPTool, "invoke_remote_mcp_tool", return_value=result): + messages = list(tool.invoke(user_id="user-1", tool_parameters={"a": 1})) + + types = [m.type for m in messages] + assert ToolInvokeMessage.MessageType.JSON in types + assert ToolInvokeMessage.MessageType.BLOB in types + assert ToolInvokeMessage.MessageType.TEXT in types + assert ToolInvokeMessage.MessageType.VARIABLE in types + assert tool.latest_usage.total_tokens == 5 + + +def test_mcp_tool_invoke_raises_for_unsupported_embedded_resource(): + tool = _build_mcp_tool() + # Use model_construct to bypass pydantic validation and force unsupported resource path. + bad_resource = EmbeddedResource.model_construct(type="resource", resource=object()) + result = CallToolResult(content=[bad_resource], _meta=None) + + with patch.object(MCPTool, "invoke_remote_mcp_tool", return_value=result): + with pytest.raises(ToolInvokeError, match="Unsupported embedded resource type"): + list(tool.invoke(user_id="user-1", tool_parameters={})) + + +def test_mcp_tool_handle_none_parameter_filters_empty_values(): + tool = _build_mcp_tool() + cleaned = tool._handle_none_parameter({"a": 1, "b": None, "c": "", "d": " ", "e": "ok"}) + assert cleaned == {"a": 1, "e": "ok"} diff --git a/api/tests/unit_tests/core/tools/test_mcp_tool_provider.py b/api/tests/unit_tests/core/tools/test_mcp_tool_provider.py new file mode 100644 index 0000000000..1060d19ab1 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_mcp_tool_provider.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from datetime import datetime +from unittest.mock import Mock, patch + +import pytest + +from core.entities.mcp_provider import MCPProviderEntity +from core.tools.entities.tool_entities import ToolProviderType +from core.tools.mcp_tool.provider import MCPToolProviderController +from core.tools.mcp_tool.tool import MCPTool + + +def _build_mcp_entity(*, icon: str = "icon.svg") -> MCPProviderEntity: + now = datetime.now() + return MCPProviderEntity( + id="db-id", + provider_id="provider-id", + name="MCP Provider", + tenant_id="tenant-1", + user_id="user-1", + server_url="https://mcp.example.com", + headers={}, + timeout=30, + sse_read_timeout=300, + authed=False, + credentials={}, + tools=[ + { + "name": "remote-tool", + "description": "remote tool", + "inputSchema": {}, + "outputSchema": {"type": "object"}, + } + ], + icon=icon, + created_at=now, + updated_at=now, + ) + + +def test_mcp_tool_provider_controller_from_entity_and_get_tools(): + entity = _build_mcp_entity() + with patch("core.tools.mcp_tool.provider.ToolTransformService.convert_mcp_schema_to_parameter", return_value=[]): + controller = MCPToolProviderController.from_entity(entity) + + assert controller.provider_type == ToolProviderType.MCP + tool = controller.get_tool("remote-tool") + assert isinstance(tool, MCPTool) + assert tool.tenant_id == "tenant-1" + + tools = controller.get_tools() + assert len(tools) == 1 + assert isinstance(tools[0], MCPTool) + + with pytest.raises(ValueError, match="not found"): + controller.get_tool("missing") + + +def test_mcp_tool_provider_controller_from_entity_requires_icon(): + entity = _build_mcp_entity(icon="") + with patch("core.tools.mcp_tool.provider.ToolTransformService.convert_mcp_schema_to_parameter", return_value=[]): + with pytest.raises(ValueError, match="icon is required"): + MCPToolProviderController.from_entity(entity) + + +def test_mcp_tool_provider_controller_from_db_delegates_to_entity(): + entity = _build_mcp_entity() + db_provider = Mock() + db_provider.to_entity.return_value = entity + with patch("core.tools.mcp_tool.provider.ToolTransformService.convert_mcp_schema_to_parameter", return_value=[]): + controller = MCPToolProviderController.from_db(db_provider) + assert isinstance(controller, MCPToolProviderController) diff --git a/api/tests/unit_tests/core/tools/test_plugin_tool.py b/api/tests/unit_tests/core/tools/test_plugin_tool.py new file mode 100644 index 0000000000..4378432a0f --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_plugin_tool.py @@ -0,0 +1,91 @@ +from __future__ import annotations + +from unittest.mock import Mock, patch + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolParameter +from core.tools.plugin_tool.tool import PluginTool + + +def _build_plugin_tool(*, has_runtime_parameters: bool) -> PluginTool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="tool-a", + label=I18nObject(en_US="tool-a"), + provider="provider-a", + ), + parameters=[ + ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + ], + has_runtime_parameters=has_runtime_parameters, + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER, credentials={"api_key": "x"}) + return PluginTool( + entity=entity, + runtime=runtime, + tenant_id="tenant-1", + icon="icon.svg", + plugin_unique_identifier="plugin-uid", + ) + + +def test_plugin_tool_invoke_and_fork_runtime(): + tool = _build_plugin_tool(has_runtime_parameters=False) + manager = Mock() + manager.invoke.return_value = iter([tool.create_text_message("ok")]) + + with patch("core.tools.plugin_tool.tool.PluginToolManager", return_value=manager): + with patch( + "core.tools.plugin_tool.tool.convert_parameters_to_plugin_format", + return_value={"converted": 1}, + ): + messages = list(tool.invoke(user_id="user-1", tool_parameters={"raw": 1})) + + assert [m.message.text for m in messages] == ["ok"] + manager.invoke.assert_called_once() + assert manager.invoke.call_args.kwargs["tool_parameters"] == {"converted": 1} + + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2")) + assert isinstance(forked, PluginTool) + assert forked.runtime.tenant_id == "tenant-2" + assert forked.plugin_unique_identifier == "plugin-uid" + + +def test_plugin_tool_get_runtime_parameters_branches(): + tool = _build_plugin_tool(has_runtime_parameters=False) + assert tool.get_runtime_parameters() == tool.entity.parameters + + tool = _build_plugin_tool(has_runtime_parameters=True) + cached = [ + ToolParameter.get_simple_instance( + name="k", + llm_description="k", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + ] + tool.runtime_parameters = cached + assert tool.get_runtime_parameters() == cached + + tool.runtime_parameters = None + manager = Mock() + returned = [ + ToolParameter.get_simple_instance( + name="dyn", + llm_description="dyn", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + ] + manager.get_runtime_parameters.return_value = returned + with patch("core.tools.plugin_tool.tool.PluginToolManager", return_value=manager): + assert tool.get_runtime_parameters(conversation_id="c1", app_id="a1", message_id="m1") == returned + assert tool.runtime_parameters == returned diff --git a/api/tests/unit_tests/core/tools/test_plugin_tool_provider.py b/api/tests/unit_tests/core/tools/test_plugin_tool_provider.py new file mode 100644 index 0000000000..5ef03cc6ca --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_plugin_tool_provider.py @@ -0,0 +1,89 @@ +from __future__ import annotations + +from unittest.mock import Mock, patch + +import pytest + +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolProviderEntityWithPlugin, + ToolProviderIdentity, + ToolProviderType, +) +from core.tools.errors import ToolProviderCredentialValidationError +from core.tools.plugin_tool.provider import PluginToolProviderController +from core.tools.plugin_tool.tool import PluginTool + + +def _build_controller() -> PluginToolProviderController: + tool_entity = ToolEntity( + identity=ToolIdentity( + author="author", + name="tool-a", + label=I18nObject(en_US="tool-a"), + provider="provider-a", + ), + parameters=[], + ) + entity = ToolProviderEntityWithPlugin( + identity=ToolProviderIdentity( + author="author", + name="provider-a", + description=I18nObject(en_US="desc"), + icon="icon.svg", + label=I18nObject(en_US="Provider"), + ), + credentials_schema=[], + plugin_id="plugin-id", + tools=[tool_entity], + ) + return PluginToolProviderController( + entity=entity, + plugin_id="plugin-id", + plugin_unique_identifier="plugin-uid", + tenant_id="tenant-1", + ) + + +def test_plugin_tool_provider_controller_basic_behaviors(): + controller = _build_controller() + assert controller.provider_type == ToolProviderType.PLUGIN + + tool = controller.get_tool("tool-a") + assert isinstance(tool, PluginTool) + assert tool.runtime.tenant_id == "tenant-1" + + tools = controller.get_tools() + assert len(tools) == 1 + assert isinstance(tools[0], PluginTool) + + with pytest.raises(ValueError, match="not found"): + controller.get_tool("missing") + + +def test_validate_credentials_success(): + controller = _build_controller() + manager = Mock() + manager.validate_provider_credentials.return_value = True + + with patch("core.tools.plugin_tool.provider.PluginToolManager", return_value=manager): + controller._validate_credentials(user_id="u1", credentials={"api_key": "x"}) + + manager.validate_provider_credentials.assert_called_once_with( + tenant_id="tenant-1", + user_id="u1", + provider="provider-a", + credentials={"api_key": "x"}, + ) + + +def test_validate_credentials_failure(): + controller = _build_controller() + manager = Mock() + manager.validate_provider_credentials.return_value = False + + with patch("core.tools.plugin_tool.provider.PluginToolManager", return_value=manager): + with pytest.raises(ToolProviderCredentialValidationError, match="Invalid credentials"): + controller._validate_credentials(user_id="u1", credentials={"api_key": "x"}) diff --git a/api/tests/unit_tests/core/tools/test_signature.py b/api/tests/unit_tests/core/tools/test_signature.py new file mode 100644 index 0000000000..a5242a78c5 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_signature.py @@ -0,0 +1,119 @@ +"""Unit tests for core.tools.signature covering signing and verification invariants.""" + +from __future__ import annotations + +from urllib.parse import parse_qs, urlparse + +import pytest + +from core.tools.signature import sign_tool_file, sign_upload_file, verify_tool_file_signature + + +def test_sign_tool_file_and_verify_roundtrip(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x01" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_ACCESS_TIMEOUT", 120) + + url = sign_tool_file("tool-file-id", ".png", for_external=False) + parsed = urlparse(url) + query = parse_qs(parsed.query) + timestamp = query["timestamp"][0] + nonce = query["nonce"][0] + sign = query["sign"][0] + + assert parsed.scheme == "https" + assert parsed.netloc == "internal.example.com" + assert parsed.path == "/files/tools/tool-file-id.png" + assert verify_tool_file_signature("tool-file-id", timestamp, nonce, sign) is True + + +def test_sign_tool_file_for_external_uses_files_url(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x04" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_ACCESS_TIMEOUT", 120) + + url = sign_tool_file("tool-file-id", ".png", for_external=True) + parsed = urlparse(url) + + assert parsed.scheme == "https" + assert parsed.netloc == "files.example.com" + assert parsed.path == "/files/tools/tool-file-id.png" + + +def test_verify_tool_file_signature_rejects_invalid_sign(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x02" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_ACCESS_TIMEOUT", 10) + + url = sign_tool_file("tool-file-id", ".txt") + parsed = urlparse(url) + query = parse_qs(parsed.query) + timestamp = query["timestamp"][0] + nonce = query["nonce"][0] + sign = query["sign"][0] + + assert verify_tool_file_signature("tool-file-id", timestamp, nonce, "bad-signature") is False + + +def test_verify_tool_file_signature_rejects_expired_signature(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x02" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_ACCESS_TIMEOUT", 10) + + url = sign_tool_file("tool-file-id", ".txt") + parsed = urlparse(url) + query = parse_qs(parsed.query) + timestamp = query["timestamp"][0] + nonce = query["nonce"][0] + sign = query["sign"][0] + + monkeypatch.setattr("core.tools.signature.time.time", lambda: int(timestamp) + 99) + assert verify_tool_file_signature("tool-file-id", timestamp, nonce, sign) is False + + +def test_sign_upload_file_prefers_internal_url(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x03" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") + + url = sign_upload_file("upload-id", ".png") + parsed = urlparse(url) + query = parse_qs(parsed.query) + + assert parsed.netloc == "internal.example.com" + assert parsed.path == "/files/upload-id/image-preview" + assert query["timestamp"][0] + assert query["nonce"][0] + assert query["sign"][0] + + +def test_sign_upload_file_uses_files_url_fallback(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr("core.tools.signature.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.signature.os.urandom", lambda _: b"\x05" * 16) + monkeypatch.setattr("core.tools.signature.dify_config.SECRET_KEY", "unit-secret") + monkeypatch.setattr("core.tools.signature.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.signature.dify_config.INTERNAL_FILES_URL", "") + + url = sign_upload_file("upload-id", ".png") + parsed = urlparse(url) + query = parse_qs(parsed.query) + + assert parsed.netloc == "files.example.com" + assert parsed.path == "/files/upload-id/image-preview" + assert query["timestamp"][0] + assert query["nonce"][0] + assert query["sign"][0] diff --git a/api/tests/unit_tests/core/tools/test_tool_engine.py b/api/tests/unit_tests/core/tools/test_tool_engine.py new file mode 100644 index 0000000000..40c107667c --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_engine.py @@ -0,0 +1,280 @@ +from __future__ import annotations + +from collections.abc import Generator +from types import SimpleNamespace +from typing import Any +from unittest.mock import Mock, patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.tools.__base.tool import Tool +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolInvokeMessage, + ToolInvokeMessageBinary, + ToolInvokeMeta, + ToolParameter, + ToolProviderType, +) +from core.tools.errors import ( + ToolEngineInvokeError, + ToolInvokeError, + ToolParameterValidationError, +) +from core.tools.tool_engine import ToolEngine + + +class _DummyTool(Tool): + result: Any + raise_error: Exception | None + + def __init__(self, entity: ToolEntity, runtime: ToolRuntime): + super().__init__(entity=entity, runtime=runtime) + self.result = [self.create_text_message("ok")] + self.raise_error = None + + def tool_provider_type(self) -> ToolProviderType: + return ToolProviderType.BUILT_IN + + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + if self.raise_error: + raise self.raise_error + if isinstance(self.result, list | Generator): + yield from self.result + else: + yield self.result + + +def _build_tool(with_llm_parameter: bool = False) -> _DummyTool: + parameters = [] + if with_llm_parameter: + parameters = [ + ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + ] + entity = ToolEntity( + identity=ToolIdentity(author="author", name="tool-a", label=I18nObject(en_US="tool-a"), provider="provider-a"), + parameters=parameters, + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER, runtime_parameters={"rt": 1}) + return _DummyTool(entity=entity, runtime=runtime) + + +def test_convert_tool_response_to_str_and_extract_binary_messages(): + tool = _build_tool() + messages = [ + tool.create_text_message("hello"), + tool.create_link_message("https://example.com"), + ToolInvokeMessage( + type=ToolInvokeMessage.MessageType.IMAGE, + message=ToolInvokeMessage.TextMessage(text="https://example.com/a.png"), + meta={"mime_type": "image/png"}, + ), + tool.create_json_message({"a": 1}), + tool.create_json_message({"a": 1}, suppress_output=True), + ] + text = ToolEngine._convert_tool_response_to_str(messages) + assert "hello" in text + assert "result link: https://example.com." in text + assert '"a": 1' in text + + blob_message = ToolInvokeMessage( + type=ToolInvokeMessage.MessageType.BLOB, + message=ToolInvokeMessage.TextMessage(text="https://example.com/blob.bin"), + meta={"mime_type": "application/octet-stream"}, + ) + link_message = ToolInvokeMessage( + type=ToolInvokeMessage.MessageType.LINK, + message=ToolInvokeMessage.TextMessage(text="https://example.com/file.pdf"), + meta={"mime_type": "application/pdf"}, + ) + binaries = list(ToolEngine._extract_tool_response_binary_and_text([messages[2], blob_message, link_message])) + assert [b.mimetype for b in binaries] == ["image/png", "application/octet-stream", "application/pdf"] + + with pytest.raises(ValueError, match="missing meta data"): + list( + ToolEngine._extract_tool_response_binary_and_text( + [ + ToolInvokeMessage( + type=ToolInvokeMessage.MessageType.IMAGE, + message=ToolInvokeMessage.TextMessage(text="x"), + ) + ] + ) + ) + + +def test_create_message_files_and_invoke_generator(): + binaries = [ + ToolInvokeMessageBinary(mimetype="image/png", url="https://example.com/abc.png"), + ToolInvokeMessageBinary(mimetype="audio/wav", url="https://example.com/def.wav"), + ] + created = [] + + def _message_file_factory(**kwargs): + obj = SimpleNamespace(id=f"mf-{len(created) + 1}", **kwargs) + created.append(obj) + return obj + + with patch("core.tools.tool_engine.MessageFile", side_effect=_message_file_factory): + with patch("core.tools.tool_engine.db") as mock_db: + ids = ToolEngine._create_message_files( + tool_messages=binaries, + agent_message=SimpleNamespace(id="msg-1"), + invoke_from=InvokeFrom.DEBUGGER, + user_id="user-1", + ) + + assert ids == ["mf-1", "mf-2"] + assert mock_db.session.add.call_count == 2 + mock_db.session.close.assert_called_once() + + tool = _build_tool() + invoked = list(ToolEngine._invoke(tool, {"a": 1}, user_id="u")) + assert invoked[0].type == ToolInvokeMessage.MessageType.TEXT + assert isinstance(invoked[-1], ToolInvokeMeta) + assert invoked[-1].error is None + + +def test_generic_invoke_success_and_error_paths(): + tool = _build_tool() + callback = Mock() + callback.on_tool_execution.side_effect = lambda **kwargs: kwargs["tool_outputs"] + response = list( + ToolEngine.generic_invoke( + tool=tool, + tool_parameters={"x": 1}, + user_id="u1", + workflow_tool_callback=callback, + workflow_call_depth=0, + conversation_id="c1", + app_id="a1", + message_id="m1", + ) + ) + assert response[0].message.text == "ok" + callback.on_tool_start.assert_called_once() + callback.on_tool_execution.assert_called_once() + + tool.raise_error = RuntimeError("boom") + error_callback = Mock() + error_callback.on_tool_execution.side_effect = lambda **kwargs: list(kwargs["tool_outputs"]) + with pytest.raises(RuntimeError, match="boom"): + list( + ToolEngine.generic_invoke( + tool=tool, + tool_parameters={"x": 1}, + user_id="u1", + workflow_tool_callback=error_callback, + workflow_call_depth=0, + ) + ) + error_callback.on_tool_error.assert_called_once() + + +def test_agent_invoke_success(): + tool = _build_tool(with_llm_parameter=True) + callback = Mock() + message = SimpleNamespace(id="m1", conversation_id="c1") + meta = ToolInvokeMeta.empty() + + with patch.object(ToolEngine, "_invoke", return_value=iter([tool.create_text_message("ok"), meta])): + with patch( + "core.tools.tool_engine.ToolFileMessageTransformer.transform_tool_invoke_messages", + side_effect=lambda messages, **kwargs: messages, + ): + with patch.object(ToolEngine, "_extract_tool_response_binary_and_text", return_value=iter([])): + with patch.object(ToolEngine, "_create_message_files", return_value=[]): + result_text, message_files, result_meta = ToolEngine.agent_invoke( + tool=tool, + tool_parameters="hello", + user_id="u1", + tenant_id="tenant-1", + message=message, + invoke_from=InvokeFrom.DEBUGGER, + agent_tool_callback=callback, + ) + + assert result_text == "ok" + assert message_files == [] + assert result_meta.error is None + callback.on_tool_start.assert_called_once() + callback.on_tool_end.assert_called_once() + + +def test_agent_invoke_param_validation_error(): + tool = _build_tool(with_llm_parameter=True) + callback = Mock() + message = SimpleNamespace(id="m1", conversation_id="c1") + + with patch.object(ToolEngine, "_invoke", side_effect=ToolParameterValidationError("bad-param")): + error_text, files, error_meta = ToolEngine.agent_invoke( + tool=tool, + tool_parameters={"a": 1}, + user_id="u1", + tenant_id="tenant-1", + message=message, + invoke_from=InvokeFrom.DEBUGGER, + agent_tool_callback=callback, + ) + + assert "tool parameters validation error" in error_text + assert files == [] + assert error_meta.error + + +def test_agent_invoke_engine_meta_error(): + tool = _build_tool(with_llm_parameter=True) + callback = Mock() + message = SimpleNamespace(id="m1", conversation_id="c1") + engine_error = ToolEngineInvokeError(ToolInvokeMeta.error_instance("meta failure")) + + with patch.object(ToolEngine, "_invoke", side_effect=engine_error): + error_text, files, error_meta = ToolEngine.agent_invoke( + tool=tool, + tool_parameters={"a": 1}, + user_id="u1", + tenant_id="tenant-1", + message=message, + invoke_from=InvokeFrom.DEBUGGER, + agent_tool_callback=callback, + ) + + assert "meta failure" in error_text + assert files == [] + assert error_meta.error == "meta failure" + + +def test_agent_invoke_tool_invoke_error(): + tool = _build_tool(with_llm_parameter=True) + callback = Mock() + message = SimpleNamespace(id="m1", conversation_id="c1") + + with patch.object(ToolEngine, "_invoke", side_effect=ToolInvokeError("invoke boom")): + error_text, files, _ = ToolEngine.agent_invoke( + tool=tool, + tool_parameters={"a": 1}, + user_id="u1", + tenant_id="tenant-1", + message=message, + invoke_from=InvokeFrom.DEBUGGER, + agent_tool_callback=callback, + ) + + assert "tool invoke error" in error_text + assert files == [] diff --git a/api/tests/unit_tests/core/tools/test_tool_file_manager.py b/api/tests/unit_tests/core/tools/test_tool_file_manager.py new file mode 100644 index 0000000000..cca8254dd6 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_file_manager.py @@ -0,0 +1,249 @@ +"""Unit tests for `ToolFileManager` behavior. + +Covers signing/verification, file persistence flows, and retrieval APIs with +mocked storage/session boundaries (httpx, SimpleNamespace, Mock/patch) to +avoid real IO. +""" + +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, Mock, patch + +import httpx +import pytest + +from core.tools.tool_file_manager import ToolFileManager + + +def _setup_tool_file_signing(monkeypatch: pytest.MonkeyPatch) -> dict[str, str]: + monkeypatch.setattr("core.tools.tool_file_manager.time.time", lambda: 1700000000) + monkeypatch.setattr("core.tools.tool_file_manager.os.urandom", lambda _: b"\x01" * 16) + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.SECRET_KEY", "secret") + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.FILES_URL", "https://files.example.com") + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.INTERNAL_FILES_URL", "https://internal.example.com") + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.FILES_ACCESS_TIMEOUT", 100) + + url = ToolFileManager.sign_file("tf-1", ".png") + return dict(part.split("=", 1) for part in url.split("?", 1)[1].split("&")) + + +def _patch_session_factory(session: Mock): + session_cm = MagicMock() + session_cm.__enter__.return_value = session + session_cm.__exit__.return_value = False + return patch("core.tools.tool_file_manager.session_factory.create_session", return_value=session_cm) + + +def test_tool_file_manager_sign_verify_valid(monkeypatch: pytest.MonkeyPatch) -> None: + query = _setup_tool_file_signing(monkeypatch) + url = ToolFileManager.sign_file("tf-1", ".png") + assert "/files/tools/tf-1.png" in url + + assert ToolFileManager.verify_file("tf-1", query["timestamp"], query["nonce"], query["sign"]) is True + + +def test_tool_file_manager_sign_verify_bad_signature(monkeypatch: pytest.MonkeyPatch) -> None: + query = _setup_tool_file_signing(monkeypatch) + + assert ToolFileManager.verify_file("tf-1", query["timestamp"], query["nonce"], "bad") is False + + +def test_tool_file_manager_sign_verify_expired_timestamp(monkeypatch: pytest.MonkeyPatch) -> None: + query = _setup_tool_file_signing(monkeypatch) + monkeypatch.setattr("core.tools.tool_file_manager.dify_config.FILES_ACCESS_TIMEOUT", 0) + monkeypatch.setattr("core.tools.tool_file_manager.time.time", lambda: 1700000100) + + assert ToolFileManager.verify_file("tf-1", query["timestamp"], query["nonce"], query["sign"]) is False + + +def test_create_file_by_raw_stores_file_and_persists_record() -> None: + manager = ToolFileManager() + session = Mock() + session.refresh.side_effect = lambda model: setattr(model, "id", "tf-1") + + def tool_file_factory(**kwargs): + return SimpleNamespace(**kwargs) + + with ( + patch("core.tools.tool_file_manager.storage") as storage, + patch("core.tools.tool_file_manager.ToolFile", side_effect=tool_file_factory), + patch("core.tools.tool_file_manager.guess_extension", return_value=".txt"), + patch("core.tools.tool_file_manager.uuid4", return_value=SimpleNamespace(hex="abc")), + _patch_session_factory(session), + ): + file_model = manager.create_file_by_raw( + user_id="u1", + tenant_id="t1", + conversation_id="c1", + file_binary=b"hello", + mimetype="text/plain", + filename="readme", + ) + + assert file_model.name.endswith(".txt") + storage.save.assert_called_once() + session.add.assert_called_once() + session.commit.assert_called_once() + session.refresh.assert_called_once_with(file_model) + + +def test_create_file_by_url_downloads_and_persists_record() -> None: + manager = ToolFileManager() + response = Mock() + response.content = b"binary" + response.headers = {"Content-Type": "application/octet-stream"} + response.raise_for_status.return_value = None + session = Mock() + + def tool_file_factory(**kwargs): + return SimpleNamespace(**kwargs) + + session.refresh.side_effect = lambda model: setattr(model, "id", "tf-2") + with ( + patch("core.tools.tool_file_manager.storage") as storage, + patch("core.tools.tool_file_manager.ToolFile", side_effect=tool_file_factory), + patch("core.tools.tool_file_manager.uuid4", return_value=SimpleNamespace(hex="def")), + _patch_session_factory(session), + patch("core.tools.tool_file_manager.ssrf_proxy.get", return_value=response), + ): + file_model = manager.create_file_by_url("u1", "t1", "https://example.com/f.bin", "c1") + + assert file_model.file_key.startswith("tools/t1/") + storage.save.assert_called_once() + session.add.assert_called_once_with(file_model) + session.commit.assert_called_once() + session.refresh.assert_called_once_with(file_model) + + +def test_create_file_by_url_raises_on_timeout() -> None: + manager = ToolFileManager() + + with patch("core.tools.tool_file_manager.ssrf_proxy.get", side_effect=httpx.TimeoutException("timeout")): + with pytest.raises(ValueError, match="timeout when downloading file"): + manager.create_file_by_url("u1", "t1", "https://example.com/f.bin", "c1") + + +def test_get_file_binary_returns_none_when_not_found() -> None: + # Arrange + manager = ToolFileManager() + session = Mock() + session.query.return_value.where.return_value.first.return_value = None + + # Act + with _patch_session_factory(session): + result = manager.get_file_binary("missing") + + # Assert + assert result is None + + +def test_get_file_binary_returns_bytes_when_found() -> None: + # Arrange + manager = ToolFileManager() + tool_file = SimpleNamespace(file_key="k1", mimetype="text/plain") + session = Mock() + session.query.return_value.where.return_value.first.return_value = tool_file + + # Act + with patch("core.tools.tool_file_manager.storage") as storage: + storage.load_once.return_value = b"hello" + with _patch_session_factory(session): + result = manager.get_file_binary("id1") + + # Assert + assert result == (b"hello", "text/plain") + + +def test_get_file_binary_by_message_file_id_when_messagefile_missing() -> None: + # Arrange + manager = ToolFileManager() + session = Mock() + first_query = Mock() + second_query = Mock() + first_query.where.return_value.first.return_value = None + second_query.where.return_value.first.return_value = None + session.query.side_effect = [first_query, second_query] + + # Act + with _patch_session_factory(session): + result = manager.get_file_binary_by_message_file_id("mf-1") + + # Assert + assert result is None + + +def test_get_file_binary_by_message_file_id_when_url_is_none() -> None: + # Arrange + manager = ToolFileManager() + message_file = SimpleNamespace(url=None) + session = Mock() + first_query = Mock() + second_query = Mock() + first_query.where.return_value.first.return_value = message_file + second_query.where.return_value.first.return_value = None + session.query.side_effect = [first_query, second_query] + + # Act + with _patch_session_factory(session): + result = manager.get_file_binary_by_message_file_id("mf-1") + + # Assert + assert result is None + + +def test_get_file_binary_by_message_file_id_returns_bytes_when_found() -> None: + # Arrange + manager = ToolFileManager() + message_file = SimpleNamespace(url="https://x/files/tools/tool123.png") + tool_file = SimpleNamespace(file_key="k2", mimetype="image/png") + session = Mock() + first_query = Mock() + second_query = Mock() + first_query.where.return_value.first.return_value = message_file + second_query.where.return_value.first.return_value = tool_file + session.query.side_effect = [first_query, second_query] + + # Act + with patch("core.tools.tool_file_manager.storage") as storage: + storage.load_once.return_value = b"img" + with _patch_session_factory(session): + result = manager.get_file_binary_by_message_file_id("mf-1") + + # Assert + assert result == (b"img", "image/png") + + +def test_get_file_generator_returns_none_when_toolfile_missing() -> None: + # Arrange + manager = ToolFileManager() + session = Mock() + session.query.return_value.where.return_value.first.return_value = None + + # Act + with _patch_session_factory(session): + stream, tool_file = manager.get_file_generator_by_tool_file_id("tool123") + + # Assert + assert stream is None + assert tool_file is None + + +def test_get_file_generator_returns_stream_when_found() -> None: + # Arrange + manager = ToolFileManager() + tool_file = SimpleNamespace(file_key="k2", mimetype="image/png") + session = Mock() + session.query.return_value.where.return_value.first.return_value = tool_file + + # Act + with patch("core.tools.tool_file_manager.storage") as storage: + stream = iter([b"a", b"b"]) + storage.load_stream.return_value = stream + with ( + _patch_session_factory(session), + patch("core.tools.tool_file_manager.ToolFilePydanticModel.model_validate", return_value="validated-file"), + ): + result_stream, result_file = manager.get_file_generator_by_tool_file_id("tool123") + assert list(result_stream) == [b"a", b"b"] + assert result_file == "validated-file" diff --git a/api/tests/unit_tests/core/tools/test_tool_label_manager.py b/api/tests/unit_tests/core/tools/test_tool_label_manager.py new file mode 100644 index 0000000000..857f4aa178 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_label_manager.py @@ -0,0 +1,92 @@ +from __future__ import annotations + +from types import SimpleNamespace +from typing import Any +from unittest.mock import PropertyMock, patch + +import pytest + +from core.tools.builtin_tool.provider import BuiltinToolProviderController +from core.tools.custom_tool.provider import ApiToolProviderController +from core.tools.tool_label_manager import ToolLabelManager +from core.tools.workflow_as_tool.provider import WorkflowToolProviderController + + +class _ConcreteBuiltinToolProviderController(BuiltinToolProviderController): + def _validate_credentials(self, user_id: str, credentials: dict[str, Any]): + return None + + +def _api_controller(provider_id: str = "api-1") -> ApiToolProviderController: + controller = object.__new__(ApiToolProviderController) + controller.provider_id = provider_id + return controller + + +def _workflow_controller(provider_id: str = "wf-1") -> WorkflowToolProviderController: + controller = object.__new__(WorkflowToolProviderController) + controller.provider_id = provider_id + return controller + + +def test_tool_label_manager_filter_tool_labels(): + filtered = ToolLabelManager.filter_tool_labels(["search", "search", "invalid", "news"]) + assert set(filtered) == {"search", "news"} + assert len(filtered) == 2 + + +def test_tool_label_manager_update_tool_labels_db(): + controller = _api_controller("api-1") + with patch("core.tools.tool_label_manager.db") as mock_db: + delete_query = mock_db.session.query.return_value.where.return_value + delete_query.delete.return_value = None + ToolLabelManager.update_tool_labels(controller, ["search", "search", "invalid"]) + + delete_query.delete.assert_called_once() + # only one valid unique label should be inserted. + assert mock_db.session.add.call_count == 1 + mock_db.session.commit.assert_called_once() + + +def test_tool_label_manager_update_tool_labels_unsupported(): + with pytest.raises(ValueError, match="Unsupported tool type"): + ToolLabelManager.update_tool_labels(object(), ["search"]) # type: ignore[arg-type] + + +def test_tool_label_manager_get_tool_labels_for_builtin_and_db(): + with patch.object( + _ConcreteBuiltinToolProviderController, + "tool_labels", + new_callable=PropertyMock, + return_value=["search", "news"], + ): + builtin = object.__new__(_ConcreteBuiltinToolProviderController) + assert ToolLabelManager.get_tool_labels(builtin) == ["search", "news"] + + api = _api_controller("api-1") + with patch("core.tools.tool_label_manager.db") as mock_db: + mock_db.session.scalars.return_value.all.return_value = ["search", "news"] + labels = ToolLabelManager.get_tool_labels(api) + assert labels == ["search", "news"] + + with pytest.raises(ValueError, match="Unsupported tool type"): + ToolLabelManager.get_tool_labels(object()) # type: ignore[arg-type] + + +def test_tool_label_manager_get_tools_labels_batch(): + assert ToolLabelManager.get_tools_labels([]) == {} + + api = _api_controller("api-1") + wf = _workflow_controller("wf-1") + records = [ + SimpleNamespace(tool_id="api-1", label_name="search"), + SimpleNamespace(tool_id="api-1", label_name="news"), + SimpleNamespace(tool_id="wf-1", label_name="utilities"), + ] + with patch("core.tools.tool_label_manager.db") as mock_db: + mock_db.session.scalars.return_value.all.return_value = records + labels = ToolLabelManager.get_tools_labels([api, wf]) + assert labels == {"api-1": ["search", "news"], "wf-1": ["utilities"]} + + with pytest.raises(ValueError, match="Unsupported tool type"): + ToolLabelManager.get_tools_labels([api, object()]) # type: ignore[list-item] diff --git a/api/tests/unit_tests/core/tools/test_tool_manager.py b/api/tests/unit_tests/core/tools/test_tool_manager.py new file mode 100644 index 0000000000..0f73e22654 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_manager.py @@ -0,0 +1,899 @@ +from __future__ import annotations + +"""Unit tests for ToolManager behavior with mocked providers and collaborators.""" + +import json +import threading +from types import SimpleNamespace +from typing import Any +from unittest.mock import Mock, patch + +import pytest + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.plugin.entities.plugin_daemon import CredentialType +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.tool_entities import ( + ApiProviderAuthType, + ToolParameter, + ToolProviderType, +) +from core.tools.errors import ToolProviderNotFoundError +from core.tools.plugin_tool.provider import PluginToolProviderController +from core.tools.tool_manager import ToolManager + + +class _SimpleContextVar: + def __init__(self): + self._is_set = False + self._value: Any = None + + def get(self): + if not self._is_set: + raise LookupError + return self._value + + def set(self, value: Any): + self._value = value + self._is_set = True + + +def _cm(session: Any): + context = Mock() + context.__enter__ = Mock(return_value=session) + context.__exit__ = Mock(return_value=False) + return context + + +def _setup_list_providers_from_api_mocks( + monkeypatch, + *, + session: Mock, + hardcoded_controller: SimpleNamespace, + plugin_controller: PluginToolProviderController, + api_controller: SimpleNamespace, + workflow_controller: SimpleNamespace, +): + mock_db = Mock() + mock_db.engine = object() + monkeypatch.setattr("core.tools.tool_manager.db", mock_db) + monkeypatch.setattr("core.tools.tool_manager.Session", lambda *args, **kwargs: _cm(session)) + monkeypatch.setattr( + ToolManager, + "list_builtin_providers", + Mock(return_value=[hardcoded_controller, plugin_controller]), + ) + monkeypatch.setattr( + ToolManager, + "list_default_builtin_providers", + Mock(return_value=[SimpleNamespace(provider="hardcoded")]), + ) + monkeypatch.setattr("core.tools.tool_manager.is_filtered", lambda *args, **kwargs: False) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.builtin_provider_to_user_provider", + lambda **kwargs: SimpleNamespace(name=kwargs["provider_controller"].entity.identity.name), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.api_provider_to_controller", + Mock(side_effect=[api_controller, RuntimeError("invalid")]), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.api_provider_to_user_provider", + Mock(return_value=SimpleNamespace(name="api-provider")), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.workflow_provider_to_controller", + Mock(side_effect=[workflow_controller, RuntimeError("deleted app")]), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolTransformService.workflow_provider_to_user_provider", + Mock(return_value=SimpleNamespace(name="workflow-provider")), + ) + monkeypatch.setattr( + "core.tools.tool_manager.ToolLabelManager.get_tools_labels", + Mock(side_effect=[{"api-1": ["search"]}, {"wf-1": ["utility"]}]), + ) + mock_mcp_service = Mock() + mock_mcp_service.list_providers.return_value = [SimpleNamespace(name="mcp-provider")] + monkeypatch.setattr("core.tools.tool_manager.MCPToolManageService", Mock(return_value=mock_mcp_service)) + monkeypatch.setattr("core.tools.tool_manager.BuiltinToolProviderSort.sort", lambda providers: providers) + + +@pytest.fixture(autouse=True) +def _reset_tool_manager_state(): + old_hardcoded = ToolManager._hardcoded_providers.copy() + old_loaded = ToolManager._builtin_providers_loaded + old_labels = ToolManager._builtin_tools_labels.copy() + try: + yield + finally: + ToolManager._hardcoded_providers = old_hardcoded + ToolManager._builtin_providers_loaded = old_loaded + ToolManager._builtin_tools_labels = old_labels + + +def test_get_hardcoded_provider_loads_cache_when_empty(): + provider = Mock() + ToolManager._hardcoded_providers = {} + + def _load(): + ToolManager._hardcoded_providers["weather"] = provider + + with patch.object(ToolManager, "load_hardcoded_providers_cache", side_effect=_load) as mock_load: + assert ToolManager.get_hardcoded_provider("weather") is provider + + mock_load.assert_called_once() + + +def test_get_builtin_provider_returns_plugin_for_missing_hardcoded(): + hardcoded = Mock() + plugin_provider = Mock() + ToolManager._hardcoded_providers = {"time": hardcoded} + + with patch.object(ToolManager, "get_plugin_provider", return_value=plugin_provider): + assert ToolManager.get_builtin_provider("time", "tenant-1") is hardcoded + assert ToolManager.get_builtin_provider("plugin/time", "tenant-1") is plugin_provider + + +def test_get_plugin_provider_uses_context_cache(): + provider_context = _SimpleContextVar() + lock_context = _SimpleContextVar() + lock_context.set(threading.Lock()) + provider_entity = SimpleNamespace(declaration=Mock(), plugin_id="pid", plugin_unique_identifier="uid") + + with patch("core.tools.tool_manager.contexts.plugin_tool_providers", provider_context): + with patch("core.tools.tool_manager.contexts.plugin_tool_providers_lock", lock_context): + with patch("core.tools.tool_manager.PluginToolManager") as mock_manager_cls: + mock_manager_cls.return_value.fetch_tool_provider.return_value = provider_entity + controller = SimpleNamespace(name="controller") + with patch("core.tools.tool_manager.PluginToolProviderController", return_value=controller): + built = ToolManager.get_plugin_provider("provider-a", "tenant-1") + cached = ToolManager.get_plugin_provider("provider-a", "tenant-1") + + assert built is controller + assert cached is controller + mock_manager_cls.return_value.fetch_tool_provider.assert_called_once() + + +def test_get_plugin_provider_raises_when_provider_missing(): + provider_context = _SimpleContextVar() + lock_context = _SimpleContextVar() + lock_context.set(threading.Lock()) + + with patch("core.tools.tool_manager.contexts.plugin_tool_providers", provider_context): + with patch("core.tools.tool_manager.contexts.plugin_tool_providers_lock", lock_context): + with patch("core.tools.tool_manager.PluginToolManager") as mock_manager_cls: + mock_manager_cls.return_value.fetch_tool_provider.return_value = None + with pytest.raises(ToolProviderNotFoundError, match="plugin provider provider-a not found"): + ToolManager.get_plugin_provider("provider-a", "tenant-1") + + +def test_get_tool_runtime_builtin_without_credentials(): + tool = Mock() + tool.fork_tool_runtime.return_value = "runtime-tool" + controller = SimpleNamespace(get_tool=Mock(return_value=tool), need_credentials=False) + + with patch.object(ToolManager, "get_builtin_provider", return_value=controller): + result = ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="current_time", + tenant_id="tenant-1", + ) + + assert result == "runtime-tool" + runtime = tool.fork_tool_runtime.call_args.kwargs["runtime"] + assert runtime.tenant_id == "tenant-1" + assert runtime.credentials == {} + + +def test_get_tool_runtime_builtin_missing_tool_raises(): + controller = SimpleNamespace(get_tool=Mock(return_value=None), need_credentials=False) + + with patch.object(ToolManager, "get_builtin_provider", return_value=controller): + with pytest.raises(ToolProviderNotFoundError, match="builtin tool missing not found"): + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="missing", + tenant_id="tenant-1", + ) + + +def test_get_tool_runtime_builtin_with_credentials_decrypts_and_forks(): + tool = Mock() + tool.fork_tool_runtime.return_value = "runtime-tool" + controller = SimpleNamespace( + get_tool=Mock(return_value=tool), + need_credentials=True, + get_credentials_schema_by_type=Mock(return_value=[]), + ) + builtin_provider = SimpleNamespace( + id="cred-1", + credential_type=CredentialType.API_KEY.value, + credentials={"encrypted": "value"}, + expires_at=-1, + user_id="user-1", + ) + + with patch.object(ToolManager, "get_builtin_provider", return_value=controller): + with patch("core.helper.credential_utils.check_credential_policy_compliance"): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value.where.return_value.order_by.return_value.first.return_value = ( + builtin_provider + ) + encrypter = Mock() + encrypter.decrypt.return_value = {"api_key": "secret"} + cache = Mock() + with patch("core.tools.tool_manager.create_provider_encrypter", return_value=(encrypter, cache)): + result = ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="weekday", + tenant_id="tenant-1", + ) + + assert result == "runtime-tool" + runtime = tool.fork_tool_runtime.call_args.kwargs["runtime"] + assert runtime.credentials == {"api_key": "secret"} + assert runtime.credential_type == CredentialType.API_KEY + + +@patch("core.tools.tool_manager.create_provider_encrypter") +@patch("core.plugin.impl.oauth.OAuthHandler") +@patch( + "services.tools.builtin_tools_manage_service.BuiltinToolManageService.get_oauth_client", + return_value={"client_id": "id"}, +) +@patch("core.tools.tool_manager.db") +@patch("core.tools.tool_manager.time.time", return_value=1000) +@patch("core.helper.credential_utils.check_credential_policy_compliance") +def test_get_tool_runtime_builtin_refreshes_expired_oauth_credentials( + mock_check, + mock_time, + mock_db, + mock_get_oauth_client, + mock_oauth_handler_cls, + mock_create_provider_encrypter, +): + tool = Mock() + tool.fork_tool_runtime.return_value = "runtime-tool" + controller = SimpleNamespace( + get_tool=Mock(return_value=tool), + need_credentials=True, + get_credentials_schema_by_type=Mock(return_value=[]), + ) + builtin_provider = SimpleNamespace( + id="cred-1", + credential_type=CredentialType.OAUTH2.value, + credentials={"encrypted": "value"}, + encrypted_credentials=None, + expires_at=1, + user_id="user-1", + ) + refreshed = SimpleNamespace(credentials={"token": "new"}, expires_at=123456) + + mock_db.session.query.return_value.where.return_value.order_by.return_value.first.return_value = builtin_provider + encrypter = Mock() + encrypter.decrypt.return_value = {"token": "old"} + encrypter.encrypt.return_value = {"token": "encrypted"} + cache = Mock() + mock_create_provider_encrypter.return_value = (encrypter, cache) + mock_oauth_handler_cls.return_value.refresh_credentials.return_value = refreshed + + with patch.object(ToolManager, "get_builtin_provider", return_value=controller): + result = ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="weekday", + tenant_id="tenant-1", + ) + + assert result == "runtime-tool" + assert builtin_provider.expires_at == refreshed.expires_at + assert builtin_provider.encrypted_credentials == json.dumps({"token": "encrypted"}) + mock_db.session.commit.assert_called_once() + cache.delete.assert_called_once() + + +def test_get_tool_runtime_builtin_plugin_provider_deleted_raises(): + plugin_controller = object.__new__(PluginToolProviderController) + plugin_controller.entity = SimpleNamespace(credentials_schema=[{"name": "k"}], oauth_schema=None) + plugin_controller.get_tool = Mock(return_value=Mock()) + plugin_controller.get_credentials_schema_by_type = Mock(return_value=[]) + + with patch.object(ToolManager, "get_builtin_provider", return_value=plugin_controller): + with patch("core.tools.tool_manager.is_valid_uuid", return_value=True): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.scalar.return_value = None + with pytest.raises(ToolProviderNotFoundError, match="provider has been deleted"): + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.BUILT_IN, + provider_id="time", + tool_name="weekday", + tenant_id="tenant-1", + credential_id="uuid-id", + ) + + +def test_get_tool_runtime_api_path(): + api_tool = Mock() + api_tool.fork_tool_runtime.return_value = "api-runtime" + api_provider = Mock() + api_provider.get_tool.return_value = api_tool + + with patch.object(ToolManager, "get_api_provider_controller", return_value=(api_provider, {"c": "enc"})): + encrypter = Mock() + encrypter.decrypt.return_value = {"c": "dec"} + with patch("core.tools.tool_manager.create_tool_provider_encrypter", return_value=(encrypter, Mock())): + assert ( + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.API, + provider_id="api-1", + tool_name="search", + tenant_id="tenant-1", + ) + == "api-runtime" + ) + + +def test_get_tool_runtime_workflow_path(): + workflow_provider = SimpleNamespace(tenant_id="tenant-1") + workflow_tool = Mock() + workflow_tool.fork_tool_runtime.return_value = "wf-runtime" + workflow_controller = Mock() + workflow_controller.get_tools.return_value = [workflow_tool] + session = Mock() + session.begin.return_value = _cm(None) + session.scalar.return_value = workflow_provider + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + with patch( + "core.tools.tool_manager.ToolTransformService.workflow_provider_to_controller", + return_value=workflow_controller, + ): + assert ( + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.WORKFLOW, + provider_id="wf-1", + tool_name="wf", + tenant_id="tenant-1", + ) + == "wf-runtime" + ) + + +def test_get_tool_runtime_plugin_path(): + with patch.object( + ToolManager, + "get_plugin_provider", + return_value=SimpleNamespace(get_tool=lambda _: "plugin-tool"), + ): + assert ( + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.PLUGIN, + provider_id="plugin-1", + tool_name="p", + tenant_id="tenant-1", + ) + == "plugin-tool" + ) + + +def test_get_tool_runtime_mcp_path(): + with patch.object( + ToolManager, + "get_mcp_provider_controller", + return_value=SimpleNamespace(get_tool=lambda _: "mcp-tool"), + ): + assert ( + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.MCP, + provider_id="mcp-1", + tool_name="m", + tenant_id="tenant-1", + ) + == "mcp-tool" + ) + + +def test_get_tool_runtime_app_not_implemented(): + with pytest.raises(NotImplementedError, match="app provider not implemented"): + ToolManager.get_tool_runtime( + provider_type=ToolProviderType.APP, + provider_id="app", + tool_name="x", + tenant_id="tenant-1", + ) + + +def test_get_agent_runtime_apply_runtime_parameters(): + parameter = ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + parameter.form = ToolParameter.ToolParameterForm.FORM + + tool_runtime = SimpleNamespace(runtime=ToolRuntime(tenant_id="tenant-1", runtime_parameters={})) + tool_runtime.get_merged_runtime_parameters = Mock(return_value=[parameter]) + + with patch.object(ToolManager, "get_tool_runtime", return_value=tool_runtime): + with patch.object(ToolManager, "_convert_tool_parameters_type", return_value={"query": "hello"}): + manager = Mock() + manager.decrypt_tool_parameters.return_value = {"query": "decrypted"} + with patch("core.tools.tool_manager.ToolParameterConfigurationManager", return_value=manager): + agent_tool = SimpleNamespace( + provider_type=ToolProviderType.API, + provider_id="api-1", + tool_name="search", + tool_parameters={"query": "hello"}, + credential_id=None, + ) + result = ToolManager.get_agent_tool_runtime( + tenant_id="tenant-1", + app_id="app-1", + agent_tool=agent_tool, + invoke_from=InvokeFrom.DEBUGGER, + variable_pool=None, + ) + + assert result is tool_runtime + assert tool_runtime.runtime.runtime_parameters["query"] == "decrypted" + + +def test_get_workflow_runtime_apply_runtime_parameters(): + parameter = ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + parameter.form = ToolParameter.ToolParameterForm.FORM + + workflow_tool = SimpleNamespace( + provider_type=ToolProviderType.API, + provider_id="api-1", + tool_name="search", + tool_configurations={"query": "hello"}, + credential_id=None, + ) + tool_runtime2 = SimpleNamespace(runtime=ToolRuntime(tenant_id="tenant-1", runtime_parameters={})) + tool_runtime2.get_merged_runtime_parameters = Mock(return_value=[parameter]) + with patch.object(ToolManager, "get_tool_runtime", return_value=tool_runtime2): + with patch.object(ToolManager, "_convert_tool_parameters_type", return_value={"query": "workflow"}): + manager = Mock() + manager.decrypt_tool_parameters.return_value = {"query": "workflow-dec"} + with patch("core.tools.tool_manager.ToolParameterConfigurationManager", return_value=manager): + workflow_result = ToolManager.get_workflow_tool_runtime( + tenant_id="tenant-1", + app_id="app-1", + node_id="node-1", + workflow_tool=workflow_tool, + invoke_from=InvokeFrom.DEBUGGER, + variable_pool=None, + ) + + assert workflow_result is tool_runtime2 + assert tool_runtime2.runtime.runtime_parameters["query"] == "workflow-dec" + + +def test_get_agent_runtime_raises_when_runtime_missing(): + tool_runtime = SimpleNamespace(runtime=None, get_merged_runtime_parameters=lambda: []) + agent_tool = SimpleNamespace( + provider_type=ToolProviderType.API, + provider_id="api-1", + tool_name="search", + tool_parameters={}, + credential_id=None, + ) + with patch.object(ToolManager, "get_tool_runtime", return_value=tool_runtime): + with patch.object(ToolManager, "_convert_tool_parameters_type", return_value={}): + with patch("core.tools.tool_manager.ToolParameterConfigurationManager", return_value=Mock()): + with pytest.raises(ValueError, match="runtime not found"): + ToolManager.get_agent_tool_runtime( + tenant_id="tenant-1", + app_id="app-1", + agent_tool=agent_tool, + ) + + +def test_get_tool_runtime_from_plugin_only_uses_form_parameters(): + form_param = ToolParameter.get_simple_instance( + name="q", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + form_param.form = ToolParameter.ToolParameterForm.FORM + llm_param = ToolParameter.get_simple_instance( + name="llm", + llm_description="llm", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + llm_param.form = ToolParameter.ToolParameterForm.LLM + + tool_entity = SimpleNamespace(runtime=ToolRuntime(tenant_id="tenant-1", runtime_parameters={})) + tool_entity.get_merged_runtime_parameters = Mock(return_value=[form_param, llm_param]) + + with patch.object(ToolManager, "get_tool_runtime", return_value=tool_entity): + result = ToolManager.get_tool_runtime_from_plugin( + tool_type=ToolProviderType.API, + tenant_id="tenant-1", + provider="api-1", + tool_name="search", + tool_parameters={"q": "hello", "llm": "ignore"}, + ) + + assert result is tool_entity + assert tool_entity.runtime.runtime_parameters == {"q": "hello"} + + +def test_hardcoded_provider_icon_success(): + provider = SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(icon="icon.svg"))) + with patch.object(ToolManager, "get_hardcoded_provider", return_value=provider): + with patch("core.tools.tool_manager.path.exists", return_value=True): + with patch("core.tools.tool_manager.mimetypes.guess_type", return_value=("image/svg+xml", None)): + icon_path, mime = ToolManager.get_hardcoded_provider_icon("time") + assert icon_path.endswith("icon.svg") + assert mime == "image/svg+xml" + + +def test_hardcoded_provider_icon_missing_raises(): + provider = SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(icon="icon.svg"))) + with patch.object(ToolManager, "get_hardcoded_provider", return_value=provider): + with patch("core.tools.tool_manager.path.exists", return_value=False): + with pytest.raises(ToolProviderNotFoundError, match="icon not found"): + ToolManager.get_hardcoded_provider_icon("time") + + +def test_list_hardcoded_providers_cache_hit(): + ToolManager._hardcoded_providers = {"p": Mock()} + ToolManager._builtin_providers_loaded = True + assert list(ToolManager.list_hardcoded_providers()) == list(ToolManager._hardcoded_providers.values()) + + +def test_clear_hardcoded_providers_cache_resets(): + ToolManager._hardcoded_providers = {"p": Mock()} + ToolManager._builtin_providers_loaded = True + ToolManager.clear_hardcoded_providers_cache() + assert ToolManager._hardcoded_providers == {} + assert ToolManager._builtin_providers_loaded is False + + +def test_list_hardcoded_providers_internal_loader(): + good_provider = SimpleNamespace( + entity=SimpleNamespace(identity=SimpleNamespace(name="good")), + get_tools=lambda: [SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="tool-a", label="A")))], + ) + provider_class = Mock(return_value=good_provider) + + with patch("core.tools.tool_manager.listdir", return_value=["good", "bad", "__skip"]): + with patch("core.tools.tool_manager.path.isdir", side_effect=lambda p: "good" in p or "bad" in p): + with patch( + "core.tools.tool_manager.load_single_subclass_from_source", + side_effect=[provider_class, RuntimeError("boom")], + ): + ToolManager._hardcoded_providers = {} + ToolManager._builtin_tools_labels = {} + providers = list(ToolManager._list_hardcoded_providers()) + + assert providers == [good_provider] + assert ToolManager._hardcoded_providers["good"] is good_provider + assert ToolManager._builtin_tools_labels["tool-a"] == "A" + assert ToolManager._builtin_providers_loaded is True + + +def test_get_tool_label_loads_cache_and_handles_missing(): + ToolManager._builtin_tools_labels = {} + + def _load(): + ToolManager._builtin_tools_labels["tool-a"] = "Label A" + + with patch.object(ToolManager, "load_hardcoded_providers_cache", side_effect=_load): + assert ToolManager.get_tool_label("tool-a") == "Label A" + assert ToolManager.get_tool_label("missing") is None + + +def test_list_default_builtin_providers_for_postgres_and_mysql(): + provider_records = [SimpleNamespace(id="id-1"), SimpleNamespace(id="id-2")] + + for scheme in ("postgresql", "mysql"): + session = Mock() + session.execute.return_value.all.return_value = [SimpleNamespace(id="id-1"), SimpleNamespace(id="id-2")] + session.query.return_value.where.return_value.all.return_value = provider_records + + with patch("core.tools.tool_manager.dify_config", SimpleNamespace(SQLALCHEMY_DATABASE_URI_SCHEME=scheme)): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + providers = ToolManager.list_default_builtin_providers("tenant-1") + + assert providers == provider_records + + +def test_list_providers_from_api_covers_builtin_api_workflow_and_mcp(monkeypatch): + hardcoded_controller = SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="hardcoded"))) + plugin_controller = object.__new__(PluginToolProviderController) + plugin_controller.entity = SimpleNamespace(identity=SimpleNamespace(name="plugin-provider")) + + api_db_provider_good = SimpleNamespace(id="api-1") + api_db_provider_bad = SimpleNamespace(id="api-2") + api_controller = SimpleNamespace(provider_id="api-1") + + workflow_db_provider_good = SimpleNamespace(id="wf-1") + workflow_db_provider_bad = SimpleNamespace(id="wf-2") + workflow_controller = SimpleNamespace(provider_id="wf-1") + + session = Mock() + session.scalars.side_effect = [ + SimpleNamespace(all=lambda: [api_db_provider_good, api_db_provider_bad]), + SimpleNamespace(all=lambda: [workflow_db_provider_good, workflow_db_provider_bad]), + ] + + _setup_list_providers_from_api_mocks( + monkeypatch, + session=session, + hardcoded_controller=hardcoded_controller, + plugin_controller=plugin_controller, + api_controller=api_controller, + workflow_controller=workflow_controller, + ) + providers = ToolManager.list_providers_from_api(user_id="user-1", tenant_id="tenant-1", typ="") + + names = {provider.name for provider in providers} + assert {"hardcoded", "plugin-provider", "api-provider", "workflow-provider", "mcp-provider"} <= names + + +def test_get_api_provider_controller_returns_controller_and_credentials(): + provider = SimpleNamespace( + id="api-1", + tenant_id="tenant-1", + name="api-provider", + description="desc", + credentials={"auth_type": "api_key_query"}, + credentials_str='{"auth_type": "api_key_query", "api_key_value": "secret"}', + schema_type="openapi", + schema="schema", + tools=[], + icon='{"background": "#000", "content": "A"}', + privacy_policy="privacy", + custom_disclaimer="disclaimer", + ) + db_query = Mock() + db_query.where.return_value.first.return_value = provider + controller = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value = db_query + with patch( + "core.tools.tool_manager.ApiToolProviderController.from_db", return_value=controller + ) as mock_from_db: + built_controller, credentials = ToolManager.get_api_provider_controller("tenant-1", "api-1") + + assert built_controller is controller + assert credentials == provider.credentials + mock_from_db.assert_called_with(provider, ApiProviderAuthType.API_KEY_QUERY) + controller.load_bundled_tools.assert_called_once_with(provider.tools) + + +def test_user_get_api_provider_masks_credentials_and_adds_labels(): + provider = SimpleNamespace( + id="api-1", + tenant_id="tenant-1", + name="api-provider", + description="desc", + credentials={"auth_type": "api_key_query"}, + credentials_str='{"auth_type": "api_key_query", "api_key_value": "secret"}', + schema_type="openapi", + schema="schema", + tools=[], + icon='{"background": "#000", "content": "A"}', + privacy_policy="privacy", + custom_disclaimer="disclaimer", + ) + db_query = Mock() + db_query.where.return_value.first.return_value = provider + controller = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value = db_query + with patch("core.tools.tool_manager.ApiToolProviderController.from_db", return_value=controller): + encrypter = Mock() + encrypter.decrypt.return_value = {"api_key_value": "secret"} + encrypter.mask_plugin_credentials.return_value = {"api_key_value": "***"} + with patch("core.tools.tool_manager.create_tool_provider_encrypter", return_value=(encrypter, Mock())): + with patch("core.tools.tool_manager.ToolLabelManager.get_tool_labels", return_value=["search"]): + user_payload = ToolManager.user_get_api_provider("api-provider", "tenant-1") + + assert user_payload["credentials"]["api_key_value"] == "***" + assert user_payload["labels"] == ["search"] + + +def test_get_api_provider_controller_not_found_raises(): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value.where.return_value.first.return_value = None + with pytest.raises(ToolProviderNotFoundError, match="api provider missing not found"): + ToolManager.get_api_provider_controller("tenant-1", "missing") + + +def test_get_mcp_provider_controller_returns_controller(): + provider_entity = SimpleNamespace(provider_icon={"background": "#111", "content": "M"}) + controller = Mock() + session = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + with patch("core.tools.tool_manager.MCPToolManageService") as mock_service_cls: + mock_service = mock_service_cls.return_value + mock_service.get_provider.return_value = provider_entity + with patch("core.tools.tool_manager.MCPToolProviderController.from_db", return_value=controller): + built = ToolManager.get_mcp_provider_controller("tenant-1", "mcp-1") + assert built is controller + + +def test_generate_mcp_tool_icon_url_returns_provider_icon(): + provider_entity = SimpleNamespace(provider_icon={"background": "#111", "content": "M"}) + session = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + with patch("core.tools.tool_manager.MCPToolManageService") as mock_service_cls: + mock_service = mock_service_cls.return_value + mock_service.get_provider_entity.return_value = provider_entity + assert ToolManager.generate_mcp_tool_icon_url("tenant-1", "mcp-1") == provider_entity.provider_icon + + +def test_get_mcp_provider_controller_missing_raises(): + session = Mock() + + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.tool_manager.Session", return_value=_cm(session)): + with patch("core.tools.tool_manager.MCPToolManageService") as mock_service_cls: + mock_service_cls.return_value.get_provider.side_effect = ValueError("missing") + with pytest.raises(ToolProviderNotFoundError, match="mcp provider mcp-1 not found"): + ToolManager.get_mcp_provider_controller("tenant-1", "mcp-1") + + +def test_generate_tool_icon_urls_for_builtin_and_plugin(): + with patch("core.tools.tool_manager.dify_config.CONSOLE_API_URL", "https://console.example.com"): + builtin_url = ToolManager.generate_builtin_tool_icon_url("time") + plugin_url = ToolManager.generate_plugin_tool_icon_url("tenant-1", "icon.svg") + + assert builtin_url.endswith("/tool-provider/builtin/time/icon") + assert "/plugin/icon" in plugin_url + + +def test_generate_tool_icon_urls_for_workflow_and_api(): + workflow_provider = SimpleNamespace(icon='{"background": "#222", "content": "W"}') + api_provider = SimpleNamespace(icon='{"background": "#333", "content": "A"}') + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value.where.return_value.first.side_effect = [workflow_provider, api_provider] + assert ToolManager.generate_workflow_tool_icon_url("tenant-1", "wf-1") == {"background": "#222", "content": "W"} + assert ToolManager.generate_api_tool_icon_url("tenant-1", "api-1") == {"background": "#333", "content": "A"} + + +def test_generate_tool_icon_urls_missing_workflow_and_api_use_default(): + with patch("core.tools.tool_manager.db") as mock_db: + mock_db.session.query.return_value.where.return_value.first.return_value = None + assert ToolManager.generate_workflow_tool_icon_url("tenant-1", "missing")["background"] == "#252525" + assert ToolManager.generate_api_tool_icon_url("tenant-1", "missing")["background"] == "#252525" + + +def test_get_tool_icon_for_builtin_provider_variants(): + plugin_provider = object.__new__(PluginToolProviderController) + plugin_provider.entity = SimpleNamespace(identity=SimpleNamespace(icon="plugin.svg")) + + with patch.object(ToolManager, "get_builtin_provider", return_value=plugin_provider): + with patch.object(ToolManager, "generate_plugin_tool_icon_url", return_value="plugin-icon"): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.BUILT_IN, "plugin-provider") == "plugin-icon" + + with patch.object(ToolManager, "get_builtin_provider", return_value=SimpleNamespace()): + with patch.object(ToolManager, "generate_builtin_tool_icon_url", return_value="builtin-icon"): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.BUILT_IN, "time") == "builtin-icon" + + +def test_get_tool_icon_for_api_workflow_and_mcp(): + with patch.object(ToolManager, "generate_api_tool_icon_url", return_value={"background": "#000"}): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.API, "api-1") == {"background": "#000"} + + with patch.object(ToolManager, "generate_workflow_tool_icon_url", return_value={"background": "#111"}): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.WORKFLOW, "wf-1") == {"background": "#111"} + + with patch.object(ToolManager, "generate_mcp_tool_icon_url", return_value={"background": "#222"}): + assert ToolManager.get_tool_icon("tenant-1", ToolProviderType.MCP, "mcp-1") == {"background": "#222"} + + +def test_get_tool_icon_plugin_error_returns_default(): + plugin_provider = object.__new__(PluginToolProviderController) + plugin_provider.entity = SimpleNamespace(identity=SimpleNamespace(icon="plugin.svg")) + + with patch.object(ToolManager, "get_plugin_provider", return_value=plugin_provider): + with patch.object(ToolManager, "generate_plugin_tool_icon_url", side_effect=RuntimeError("fail")): + icon = ToolManager.get_tool_icon("tenant-1", ToolProviderType.PLUGIN, "plugin-provider") + assert icon["background"] == "#252525" + + +def test_get_tool_icon_invalid_provider_type_raises(): + with pytest.raises(ValueError, match="provider type"): + ToolManager.get_tool_icon("tenant-1", "invalid", "x") # type: ignore[arg-type] + + +def test_convert_tool_parameters_type_agent_and_workflow_branches(): + file_param = ToolParameter.get_simple_instance( + name="file", + llm_description="file", + typ=ToolParameter.ToolParameterType.FILE, + required=True, + ) + file_param.form = ToolParameter.ToolParameterForm.FORM + + with pytest.raises(ValueError, match="file type parameter file not supported in agent"): + ToolManager._convert_tool_parameters_type( + parameters=[file_param], + variable_pool=None, + tool_configurations={"file": "x"}, + typ="agent", + ) + + text_param = ToolParameter.get_simple_instance( + name="text", + llm_description="text", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + text_param.form = ToolParameter.ToolParameterForm.FORM + plain = ToolManager._convert_tool_parameters_type( + parameters=[text_param], + variable_pool=None, + tool_configurations={"text": "hello"}, + typ="workflow", + ) + assert plain == {"text": "hello"} + + variable_pool = Mock() + variable_pool.get.return_value = SimpleNamespace(value="from-variable") + variable_pool.convert_template.return_value = SimpleNamespace(text="from-template") + + mixed = ToolManager._convert_tool_parameters_type( + parameters=[text_param], + variable_pool=variable_pool, + tool_configurations={"text": {"type": "mixed", "value": "Hello {{name}}"}}, + typ="workflow", + ) + assert mixed == {"text": "from-template"} + + variable = ToolManager._convert_tool_parameters_type( + parameters=[text_param], + variable_pool=variable_pool, + tool_configurations={"text": {"type": "variable", "value": ["sys", "query"]}}, + typ="workflow", + ) + assert variable == {"text": "from-variable"} + + +def test_convert_tool_parameters_type_constant_branch(): + text_param = ToolParameter.get_simple_instance( + name="text", + llm_description="text", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + text_param.form = ToolParameter.ToolParameterForm.FORM + variable_pool = Mock() + + constant = ToolManager._convert_tool_parameters_type( + parameters=[text_param], + variable_pool=variable_pool, + tool_configurations={"text": {"type": "constant", "value": "fixed"}}, + typ="workflow", + ) + + assert constant == {"text": "fixed"} diff --git a/api/tests/unit_tests/core/tools/test_tool_provider_controller.py b/api/tests/unit_tests/core/tools/test_tool_provider_controller.py new file mode 100644 index 0000000000..30b8494c92 --- /dev/null +++ b/api/tests/unit_tests/core/tools/test_tool_provider_controller.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +from collections.abc import Generator +from typing import Any + +import pytest + +from core.entities.provider_entities import ProviderConfig +from core.tools.__base.tool import Tool +from core.tools.__base.tool_provider import ToolProviderController +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolInvokeMessage, + ToolProviderEntity, + ToolProviderIdentity, + ToolProviderType, +) +from core.tools.errors import ToolProviderCredentialValidationError + + +class _DummyTool(Tool): + def tool_provider_type(self) -> ToolProviderType: + return ToolProviderType.BUILT_IN + + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + yield self.create_text_message("ok") + + +class _DummyController(ToolProviderController): + def get_tool(self, tool_name: str) -> Tool: + entity = ToolEntity( + identity=ToolIdentity( + author="author", + name=tool_name, + label=I18nObject(en_US=tool_name), + provider="provider", + ), + parameters=[], + ) + return _DummyTool(entity=entity, runtime=ToolRuntime(tenant_id="tenant")) + + +def _provider_identity() -> ToolProviderIdentity: + return ToolProviderIdentity( + author="author", + name="provider", + description=I18nObject(en_US="desc"), + icon="icon.svg", + label=I18nObject(en_US="Provider"), + ) + + +def test_tool_provider_controller_get_credentials_schema_returns_deep_copy(): + entity = ToolProviderEntity( + identity=_provider_identity(), + credentials_schema=[ProviderConfig(type=ProviderConfig.Type.TEXT_INPUT, name="api_key", required=False)], + ) + controller = _DummyController(entity=entity) + + schema = controller.get_credentials_schema() + schema[0].name = "changed" + + assert controller.entity.credentials_schema[0].name == "api_key" + + +def test_tool_provider_controller_default_provider_type(): + entity = ToolProviderEntity(identity=_provider_identity(), credentials_schema=[]) + controller = _DummyController(entity=entity) + + assert controller.provider_type == ToolProviderType.BUILT_IN + + +def test_validate_credentials_format_covers_required_default_and_type_rules(): + select_options = [ProviderConfig.Option(value="opt-a", label=I18nObject(en_US="A"))] + entity = ToolProviderEntity( + identity=_provider_identity(), + credentials_schema=[ + ProviderConfig(type=ProviderConfig.Type.TEXT_INPUT, name="required_text", required=True), + ProviderConfig(type=ProviderConfig.Type.SECRET_INPUT, name="secret", required=False), + ProviderConfig(type=ProviderConfig.Type.SELECT, name="choice", required=False, options=select_options), + ProviderConfig(type=ProviderConfig.Type.TEXT_INPUT, name="with_default", required=False, default="x"), + ], + ) + controller = _DummyController(entity=entity) + + credentials = {"required_text": "value", "secret": None, "choice": "opt-a"} + controller.validate_credentials_format(credentials) + assert credentials["with_default"] == "x" + + with pytest.raises(ToolProviderCredentialValidationError, match="not found"): + controller.validate_credentials_format({"required_text": "value", "unknown": "v"}) + + with pytest.raises(ToolProviderCredentialValidationError, match="is required"): + controller.validate_credentials_format({"secret": "s"}) + + with pytest.raises(ToolProviderCredentialValidationError, match="should be string"): + controller.validate_credentials_format({"required_text": 123}) # type: ignore[arg-type] + + with pytest.raises(ToolProviderCredentialValidationError, match="should be one of"): + controller.validate_credentials_format({"required_text": "value", "choice": "opt-b"}) diff --git a/api/tests/unit_tests/core/tools/utils/test_configuration.py b/api/tests/unit_tests/core/tools/utils/test_configuration.py new file mode 100644 index 0000000000..5ceaa08893 --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_configuration.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from collections.abc import Generator +from typing import Any +from unittest.mock import patch + +from core.app.entities.app_invoke_entities import InvokeFrom +from core.helper.tool_parameter_cache import ToolParameterCache +from core.tools.__base.tool import Tool +from core.tools.__base.tool_runtime import ToolRuntime +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolInvokeMessage, + ToolParameter, + ToolProviderType, +) +from core.tools.utils.configuration import ToolParameterConfigurationManager + + +class _DummyTool(Tool): + runtime_overrides: list[ToolParameter] + + def __init__(self, entity: ToolEntity, runtime: ToolRuntime, runtime_overrides: list[ToolParameter]): + super().__init__(entity=entity, runtime=runtime) + self.runtime_overrides = runtime_overrides + + def tool_provider_type(self) -> ToolProviderType: + return ToolProviderType.BUILT_IN + + def _invoke( + self, + user_id: str, + tool_parameters: dict[str, Any], + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> Generator[ToolInvokeMessage, None, None]: + yield self.create_text_message("ok") + + def get_runtime_parameters( + self, + conversation_id: str | None = None, + app_id: str | None = None, + message_id: str | None = None, + ) -> list[ToolParameter]: + return self.runtime_overrides + + +def _param( + name: str, + *, + typ: ToolParameter.ToolParameterType, + form: ToolParameter.ToolParameterForm, + required: bool = False, +) -> ToolParameter: + return ToolParameter( + name=name, + label=I18nObject(en_US=name), + placeholder=I18nObject(en_US=""), + human_description=I18nObject(en_US=""), + type=typ, + form=form, + required=required, + default=None, + ) + + +def _build_manager() -> ToolParameterConfigurationManager: + base_params = [ + _param("secret", typ=ToolParameter.ToolParameterType.SECRET_INPUT, form=ToolParameter.ToolParameterForm.FORM), + _param("plain", typ=ToolParameter.ToolParameterType.STRING, form=ToolParameter.ToolParameterForm.FORM), + ] + runtime_overrides = [ + _param("secret", typ=ToolParameter.ToolParameterType.SECRET_INPUT, form=ToolParameter.ToolParameterForm.FORM), + _param("runtime_only", typ=ToolParameter.ToolParameterType.STRING, form=ToolParameter.ToolParameterForm.FORM), + ] + entity = ToolEntity( + identity=ToolIdentity(author="a", name="tool-a", label=I18nObject(en_US="tool-a"), provider="provider-a"), + parameters=base_params, + ) + runtime = ToolRuntime(tenant_id="tenant-1", invoke_from=InvokeFrom.DEBUGGER) + tool = _DummyTool(entity=entity, runtime=runtime, runtime_overrides=runtime_overrides) + return ToolParameterConfigurationManager( + tenant_id="tenant-1", + tool_runtime=tool, + provider_name="provider-a", + provider_type=ToolProviderType.BUILT_IN, + identity_id="ID.1", + ) + + +def test_merge_and_mask_parameters(): + manager = _build_manager() + + masked = manager.mask_tool_parameters({"secret": "abcdefghi", "plain": "x", "runtime_only": "y"}) + assert masked["secret"] == "ab*****hi" + assert masked["plain"] == "x" + assert masked["runtime_only"] == "y" + + +def test_encrypt_tool_parameters(): + manager = _build_manager() + + with patch("core.tools.utils.configuration.encrypter.encrypt_token", return_value="enc"): + encrypted = manager.encrypt_tool_parameters({"secret": "raw", "plain": "x"}) + + assert encrypted["secret"] == "enc" + assert encrypted["plain"] == "x" + + +def test_decrypt_tool_parameters_cache_hit_and_miss(): + manager = _build_manager() + + with ( + patch.object(ToolParameterCache, "get", return_value={"secret": "cached"}), + patch.object(ToolParameterCache, "set") as mock_set, + ): + assert manager.decrypt_tool_parameters({"secret": "enc"}) == {"secret": "cached"} + mock_set.assert_not_called() + + with ( + patch.object(ToolParameterCache, "get", return_value=None), + patch.object(ToolParameterCache, "set") as mock_set, + patch("core.tools.utils.configuration.encrypter.decrypt_token", return_value="dec"), + ): + decrypted = manager.decrypt_tool_parameters({"secret": "enc", "plain": "x"}) + assert decrypted["secret"] == "dec" + mock_set.assert_called_once() + + +def test_delete_tool_parameters_cache(): + manager = _build_manager() + + with patch.object(ToolParameterCache, "delete") as mock_delete: + manager.delete_tool_parameters_cache() + + mock_delete.assert_called_once() + + +def test_configuration_manager_decrypt_suppresses_errors(): + manager = _build_manager() + with ( + patch.object(ToolParameterCache, "get", return_value=None), + patch("core.tools.utils.configuration.encrypter.decrypt_token", side_effect=RuntimeError("boom")), + ): + decrypted = manager.decrypt_tool_parameters({"secret": "enc"}) + # decryption failure is suppressed, original value is retained. + assert decrypted["secret"] == "enc" diff --git a/api/tests/unit_tests/core/tools/utils/test_encryption.py b/api/tests/unit_tests/core/tools/utils/test_encryption.py index 94be0bb573..ce77473dbd 100644 --- a/api/tests/unit_tests/core/tools/utils/test_encryption.py +++ b/api/tests/unit_tests/core/tools/utils/test_encryption.py @@ -1,10 +1,13 @@ import copy -from unittest.mock import patch +from types import SimpleNamespace +from typing import Any +from unittest.mock import Mock, patch import pytest from core.entities.provider_entities import BasicProviderConfig from core.helper.provider_encryption import ProviderConfigEncrypter +from core.tools.utils.encryption import create_tool_provider_encrypter # --------------------------- @@ -13,13 +16,13 @@ from core.helper.provider_encryption import ProviderConfigEncrypter class NoopCache: """Simple cache stub: always returns None, does nothing for set/delete.""" - def get(self): + def get(self) -> Any | None: return None - def set(self, config): + def set(self, config: Any) -> None: pass - def delete(self): + def delete(self) -> None: pass @@ -179,3 +182,35 @@ def test_decrypt_swallow_exception_and_keep_original(encrypter_obj): out = encrypter_obj.decrypt({"password": "ENC_ERR"}) assert out["password"] == "ENC_ERR" + + +def test_create_tool_provider_encrypter_builds_cache_and_encrypter(): + basic_config = BasicProviderConfig(name="key", type=BasicProviderConfig.Type.TEXT_INPUT) + credential_schema_item = SimpleNamespace(to_basic_provider_config=lambda: basic_config) + controller = SimpleNamespace( + provider_type=SimpleNamespace(value="builtin"), + entity=SimpleNamespace(identity=SimpleNamespace(name="provider-a")), + get_credentials_schema=lambda: [credential_schema_item], + ) + + cache_instance = Mock() + encrypter_instance = Mock() + + with patch( + "core.tools.utils.encryption.SingletonProviderCredentialsCache", return_value=cache_instance + ) as cache_cls: + with patch("core.tools.utils.encryption.ProviderConfigEncrypter", return_value=encrypter_instance) as enc_cls: + encrypter, cache = create_tool_provider_encrypter("tenant-1", controller) + + assert encrypter is encrypter_instance + assert cache is cache_instance + cache_cls.assert_called_once_with( + tenant_id="tenant-1", + provider_type="builtin", + provider_identity="provider-a", + ) + enc_cls.assert_called_once_with( + tenant_id="tenant-1", + config=[basic_config], + provider_config_cache=cache_instance, + ) diff --git a/api/tests/unit_tests/core/tools/utils/test_misc_utils_extra.py b/api/tests/unit_tests/core/tools/utils/test_misc_utils_extra.py new file mode 100644 index 0000000000..4ce73272bf --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_misc_utils_extra.py @@ -0,0 +1,478 @@ +from __future__ import annotations + +import uuid +from contextlib import nullcontext +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest +from yaml import YAMLError + +from core.app.app_config.entities import DatasetRetrieveConfigEntity +from core.callback_handler.index_tool_callback_handler import DatasetIndexToolCallbackHandler +from core.rag.models.document import Document as RagDocument +from core.tools.utils.dataset_retriever import dataset_multi_retriever_tool as multi_retriever_module +from core.tools.utils.dataset_retriever import dataset_retriever_tool as single_retriever_module +from core.tools.utils.dataset_retriever.dataset_multi_retriever_tool import DatasetMultiRetrieverTool +from core.tools.utils.dataset_retriever.dataset_retriever_tool import DatasetRetrieverTool as SingleDatasetRetrieverTool +from core.tools.utils.text_processing_utils import remove_leading_symbols +from core.tools.utils.uuid_utils import is_valid_uuid +from core.tools.utils.yaml_utils import _load_yaml_file, load_yaml_file_cached + + +def _retrieve_config() -> DatasetRetrieveConfigEntity: + return DatasetRetrieveConfigEntity(retrieve_strategy=DatasetRetrieveConfigEntity.RetrieveStrategy.SINGLE) + + +class _FakeFlaskApp: + def app_context(self): + return nullcontext() + + +class _ImmediateThread: + def __init__(self, target=None, kwargs=None, **_kwargs): + self._target = target + self._kwargs = kwargs or {} + + def start(self): + if self._target is not None: + self._target(**self._kwargs) + + def join(self): + return None + + +class _TestHitCallback(DatasetIndexToolCallbackHandler): + def __init__(self): + self.queries: list[tuple[str, str]] = [] + self.documents: list[RagDocument] | None = None + self.resources = None + + def on_query(self, query: str, dataset_id: str): + self.queries.append((query, dataset_id)) + + def on_tool_end(self, documents: list[RagDocument]): + self.documents = documents + + def return_retriever_resource_info(self, resource): + self.resources = list(resource) + + +def test_remove_leading_symbols_preserves_markdown_link_and_strips_punctuation(): + markdown = "[Example](https://example.com) content" + assert remove_leading_symbols(markdown) == markdown + + assert remove_leading_symbols("...Hello world") == "Hello world" + + +def test_is_valid_uuid_handles_valid_invalid_and_empty_values(): + assert is_valid_uuid(str(uuid.uuid4())) is True + assert is_valid_uuid("not-a-uuid") is False + assert is_valid_uuid("") is False + assert is_valid_uuid(None) is False + + +def test_load_yaml_file_valid(tmp_path): + valid_file = tmp_path / "valid.yaml" + valid_file.write_text("a: 1\nb: two\n", encoding="utf-8") + + loaded = _load_yaml_file(file_path=str(valid_file)) + + assert loaded == {"a": 1, "b": "two"} + + +def test_load_yaml_file_missing(tmp_path): + with pytest.raises(FileNotFoundError): + _load_yaml_file(file_path=str(tmp_path / "missing.yaml")) + + +def test_load_yaml_file_invalid(tmp_path): + invalid_file = tmp_path / "invalid.yaml" + invalid_file.write_text("a: [1, 2\n", encoding="utf-8") + + with pytest.raises(YAMLError): + _load_yaml_file(file_path=str(invalid_file)) + + +def test_load_yaml_file_cached_hits(tmp_path): + valid_file = tmp_path / "valid.yaml" + valid_file.write_text("a: 1\nb: two\n", encoding="utf-8") + + load_yaml_file_cached.cache_clear() + assert load_yaml_file_cached(str(valid_file)) == {"a": 1, "b": "two"} + + assert load_yaml_file_cached(str(valid_file)) == {"a": 1, "b": "two"} + assert load_yaml_file_cached.cache_info().hits == 1 + + +def test_single_dataset_retriever_from_dataset_builds_name_and_description(): + dataset = SimpleNamespace(id="dataset-1", tenant_id="tenant-1", name="Knowledge", description=None) + + tool = SingleDatasetRetrieverTool.from_dataset( + dataset=dataset, + retrieve_config=_retrieve_config(), + return_resource=False, + retriever_from="prod", + inputs={}, + ) + + assert tool.name == "dataset_dataset_1" + assert tool.description == "useful for when you want to answer queries about the Knowledge" + + +def test_single_dataset_retriever_external_run_returns_content_and_resources(): + dataset = SimpleNamespace( + id="dataset-1", + tenant_id="tenant-1", + name="Knowledge Base", + provider="external", + indexing_technique="high_quality", + retrieval_model={}, + ) + callback = _TestHitCallback() + dataset_retrieval = Mock() + dataset_retrieval.get_metadata_filter_condition.return_value = ( + {"dataset-1": ["doc-a"]}, + {"logical_operator": "and"}, + ) + db_session = Mock() + db_session.scalar.return_value = dataset + external_documents = [ + {"content": "first", "metadata": {"document_id": "doc-a"}, "score": 0.9, "title": "Doc A"}, + {"content": "second", "metadata": {"document_id": "doc-b"}, "score": 0.8, "title": "Doc B"}, + ] + + tool = SingleDatasetRetrieverTool( + tenant_id="tenant-1", + dataset_id="dataset-1", + retrieve_config=_retrieve_config(), + return_resource=True, + retriever_from="dev", + hit_callbacks=[callback], + inputs={"x": 1}, + ) + + with patch.object(single_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(single_retriever_module, "DatasetRetrieval", return_value=dataset_retrieval): + with patch.object( + single_retriever_module.ExternalDatasetService, + "fetch_external_knowledge_retrieval", + return_value=external_documents, + ) as fetch_mock: + result = tool.run(query="hello") + + assert result == "first\nsecond" + assert callback.queries == [("hello", "dataset-1")] + assert callback.resources is not None + resource_info = callback.resources + assert [item.position for item in resource_info] == [1, 2] + assert resource_info[0].dataset_id == "dataset-1" + fetch_mock.assert_called_once() + + +def test_single_dataset_retriever_returns_empty_when_metadata_filter_finds_no_documents(): + dataset = SimpleNamespace( + id="dataset-1", + tenant_id="tenant-1", + name="Knowledge Base", + provider="internal", + indexing_technique="high_quality", + retrieval_model=None, + ) + dataset_retrieval = Mock() + dataset_retrieval.get_metadata_filter_condition.return_value = ({"dataset-1": []}, {"logical_operator": "and"}) + db_session = Mock() + db_session.scalar.return_value = dataset + + tool = SingleDatasetRetrieverTool( + tenant_id="tenant-1", + dataset_id="dataset-1", + retrieve_config=_retrieve_config(), + return_resource=False, + retriever_from="prod", + hit_callbacks=[_TestHitCallback()], + inputs={}, + ) + + with patch.object(single_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(single_retriever_module, "DatasetRetrieval", return_value=dataset_retrieval): + with patch.object(single_retriever_module.RetrievalService, "retrieve") as retrieve_mock: + result = tool.run(query="hello") + + assert result == "" + retrieve_mock.assert_not_called() + + +def test_single_dataset_retriever_non_economy_run_sorts_context_and_resources(): + dataset = SimpleNamespace( + id="dataset-1", + tenant_id="tenant-1", + name="Knowledge Base", + provider="internal", + indexing_technique="high_quality", + retrieval_model={ + "search_method": "semantic_search", + "score_threshold_enabled": True, + "score_threshold": 0.2, + "reranking_enable": True, + "reranking_model": {"reranking_provider_name": "provider", "reranking_model_name": "model"}, + "reranking_mode": "reranking_model", + "weights": {"vector_setting": {"vector_weight": 0.6}}, + }, + ) + callback = _TestHitCallback() + dataset_retrieval = Mock() + dataset_retrieval.get_metadata_filter_condition.return_value = (None, None) + low_segment = SimpleNamespace( + id="seg-low", + dataset_id="dataset-1", + document_id="doc-low", + content="raw low", + answer="low answer", + hit_count=1, + word_count=10, + position=3, + index_node_hash="hash-low", + get_sign_content=lambda: "signed low", + ) + high_segment = SimpleNamespace( + id="seg-high", + dataset_id="dataset-1", + document_id="doc-high", + content="raw high", + answer=None, + hit_count=9, + word_count=25, + position=1, + index_node_hash="hash-high", + get_sign_content=lambda: "signed high", + ) + records = [ + SimpleNamespace(segment=low_segment, score=0.2, summary="summary low"), + SimpleNamespace(segment=high_segment, score=0.9, summary=None), + ] + documents = [ + RagDocument(page_content="first", metadata={"doc_id": "node-low", "score": 0.2}), + RagDocument(page_content="second", metadata={"doc_id": "node-high", "score": 0.9}), + ] + lookup_doc_low = SimpleNamespace( + id="doc-low", name="Document Low", data_source_type="upload_file", doc_metadata={"lang": "en"} + ) + lookup_doc_high = SimpleNamespace( + id="doc-high", name="Document High", data_source_type="notion", doc_metadata={"lang": "fr"} + ) + db_session = Mock() + db_session.scalar.side_effect = [dataset, lookup_doc_low, lookup_doc_high] + db_session.query.return_value.filter_by.return_value.first.return_value = dataset + + tool = SingleDatasetRetrieverTool( + tenant_id="tenant-1", + dataset_id="dataset-1", + retrieve_config=_retrieve_config(), + return_resource=True, + retriever_from="dev", + hit_callbacks=[callback], + inputs={}, + top_k=2, + ) + + with patch.object(single_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(single_retriever_module, "DatasetRetrieval", return_value=dataset_retrieval): + with patch.object(single_retriever_module.RetrievalService, "retrieve", return_value=documents): + with patch.object( + single_retriever_module.RetrievalService, + "format_retrieval_documents", + return_value=records, + ): + result = tool.run(query="hello") + + assert result == "signed high\nsummary low\nquestion:signed low answer:low answer" + assert callback.documents == documents + assert callback.resources is not None + resource_info = callback.resources + assert [item.position for item in resource_info] == [1, 2] + assert resource_info[0].segment_id == "seg-high" + assert resource_info[0].hit_count == 9 + assert resource_info[1].summary == "summary low" + assert resource_info[1].content == "question:raw low \nanswer:low answer" + + +def test_multi_dataset_retriever_from_dataset_sets_tool_name(): + tool = DatasetMultiRetrieverTool.from_dataset( + dataset_ids=["dataset-1"], + tenant_id="tenant-1", + reranking_provider_name="provider", + reranking_model_name="model", + return_resource=False, + retriever_from="prod", + ) + + assert tool.name == "dataset_tenant_1" + + +def test_multi_dataset_retriever_retriever_returns_early_when_dataset_is_missing(): + callback = _TestHitCallback() + all_documents: list[RagDocument] = [] + db_session = Mock() + db_session.scalar.return_value = None + tool = DatasetMultiRetrieverTool( + tenant_id="tenant-1", + dataset_ids=["dataset-1"], + reranking_provider_name="provider", + reranking_model_name="model", + return_resource=False, + retriever_from="prod", + ) + + with patch.object(multi_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(multi_retriever_module.RetrievalService, "retrieve") as retrieve_mock: + result = tool._retriever( + flask_app=_FakeFlaskApp(), + dataset_id="dataset-1", + query="hello", + all_documents=all_documents, + hit_callbacks=[callback], + ) + + assert result == [] + assert all_documents == [] + assert callback.queries == [] + retrieve_mock.assert_not_called() + + +def test_multi_dataset_retriever_retriever_non_economy_uses_retrieval_model(): + dataset = SimpleNamespace( + id="dataset-1", + tenant_id="tenant-1", + indexing_technique="high_quality", + retrieval_model={ + "search_method": "semantic_search", + "top_k": 6, + "score_threshold_enabled": True, + "score_threshold": 0.4, + "reranking_enable": False, + "reranking_mode": None, + "weights": {"balanced": True}, + }, + ) + callback = _TestHitCallback() + documents = [RagDocument(page_content="retrieved", metadata={"doc_id": "node-1", "score": 0.4})] + all_documents: list[RagDocument] = [] + db_session = Mock() + db_session.scalar.return_value = dataset + tool = DatasetMultiRetrieverTool( + tenant_id="tenant-1", + dataset_ids=["dataset-1"], + reranking_provider_name="provider", + reranking_model_name="model", + return_resource=False, + retriever_from="prod", + top_k=2, + ) + + with patch.object(multi_retriever_module, "db", SimpleNamespace(session=db_session)): + with patch.object(multi_retriever_module.RetrievalService, "retrieve", return_value=documents) as retrieve_mock: + tool._retriever( + flask_app=_FakeFlaskApp(), + dataset_id="dataset-1", + query="hello", + all_documents=all_documents, + hit_callbacks=[callback], + ) + + assert all_documents == documents + assert callback.queries == [("hello", "dataset-1")] + retrieve_mock.assert_called_once_with( + retrieval_method="semantic_search", + dataset_id="dataset-1", + query="hello", + top_k=6, + score_threshold=0.4, + reranking_model=None, + reranking_mode="reranking_model", + weights={"balanced": True}, + ) + + +def test_multi_dataset_retriever_run_orders_segments_and_returns_resources(): + callback = _TestHitCallback() + tool = DatasetMultiRetrieverTool( + tenant_id="tenant-1", + dataset_ids=["dataset-1", "dataset-2"], + reranking_provider_name="provider", + reranking_model_name="model", + return_resource=True, + retriever_from="dev", + hit_callbacks=[callback], + top_k=2, + score_threshold=0.1, + ) + first_doc = RagDocument(page_content="first", metadata={"doc_id": "node-2", "score": 0.4}) + second_doc = RagDocument(page_content="second", metadata={"doc_id": "node-1", "score": 0.9}) + + def fake_retriever(**kwargs): + if kwargs["dataset_id"] == "dataset-1": + kwargs["all_documents"].append(first_doc) + else: + kwargs["all_documents"].append(second_doc) + + segment_for_node_2 = SimpleNamespace( + id="seg-2", + dataset_id="dataset-1", + document_id="doc-2", + index_node_id="node-2", + content="raw two", + answer="answer two", + hit_count=2, + word_count=20, + position=2, + index_node_hash="hash-2", + get_sign_content=lambda: "signed two", + ) + segment_for_node_1 = SimpleNamespace( + id="seg-1", + dataset_id="dataset-2", + document_id="doc-1", + index_node_id="node-1", + content="raw one", + answer=None, + hit_count=7, + word_count=30, + position=1, + index_node_hash="hash-1", + get_sign_content=lambda: "signed one", + ) + db_session = Mock() + db_session.scalars.return_value.all.return_value = [segment_for_node_2, segment_for_node_1] + db_session.query.return_value.filter_by.return_value.first.side_effect = [ + SimpleNamespace(id="dataset-2", name="Dataset Two"), + SimpleNamespace(id="dataset-1", name="Dataset One"), + ] + db_session.scalar.side_effect = [ + SimpleNamespace(id="doc-1", name="Doc One", data_source_type="upload_file", doc_metadata={"p": 1}), + SimpleNamespace(id="doc-2", name="Doc Two", data_source_type="notion", doc_metadata={"p": 2}), + ] + model_manager = Mock() + model_manager.get_model_instance.return_value = Mock() + rerank_runner = Mock() + rerank_runner.run.return_value = [second_doc, first_doc] + fake_current_app = SimpleNamespace(_get_current_object=lambda: _FakeFlaskApp()) + + with patch.object(tool, "_retriever", side_effect=fake_retriever) as retriever_mock: + with patch.object(multi_retriever_module, "current_app", fake_current_app): + with patch.object(multi_retriever_module.threading, "Thread", _ImmediateThread): + with patch.object(multi_retriever_module, "ModelManager", return_value=model_manager): + with patch.object(multi_retriever_module, "RerankModelRunner", return_value=rerank_runner): + with patch.object(multi_retriever_module, "db", SimpleNamespace(session=db_session)): + result = tool.run(query="hello") + + assert result == "signed one\nquestion:signed two answer:answer two" + assert retriever_mock.call_count == 2 + assert callback.documents == [second_doc, first_doc] + assert callback.resources is not None + resource_info = callback.resources + assert [item.position for item in resource_info] == [1, 2] + assert resource_info[0].score == 0.9 + assert resource_info[0].content == "raw one" + assert resource_info[1].score == 0.4 + assert resource_info[1].content == "question:raw two \nanswer:answer two" diff --git a/api/tests/unit_tests/core/tools/utils/test_model_invocation_utils.py b/api/tests/unit_tests/core/tools/utils/test_model_invocation_utils.py new file mode 100644 index 0000000000..2acae889b2 --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_model_invocation_utils.py @@ -0,0 +1,158 @@ +"""Unit tests for ModelInvocationUtils. + +Covers success and error branches for ModelInvocationUtils, including +InvokeModelError and invoke error mappings for InvokeAuthorizationError, +InvokeBadRequestError, InvokeConnectionError, InvokeRateLimitError, and +InvokeServerUnavailableError. Assumes mocked model instances and managers. +""" + +from __future__ import annotations + +from decimal import Decimal +from types import SimpleNamespace +from unittest.mock import Mock, patch + +import pytest + +from core.tools.utils.model_invocation_utils import InvokeModelError, ModelInvocationUtils +from dify_graph.model_runtime.entities.model_entities import ModelPropertyKey +from dify_graph.model_runtime.errors.invoke import ( + InvokeAuthorizationError, + InvokeBadRequestError, + InvokeConnectionError, + InvokeRateLimitError, + InvokeServerUnavailableError, +) + + +def _mock_model_instance(*, schema: dict | None = None) -> SimpleNamespace: + model_type_instance = Mock() + model_type_instance.get_model_schema.return_value = ( + SimpleNamespace(model_properties=schema or {}) if schema is not None else None + ) + return SimpleNamespace( + provider="provider", + model="model-a", + model_name="model-a", + credentials={"api_key": "x"}, + model_type_instance=model_type_instance, + get_llm_num_tokens=lambda prompt_messages: 5, + invoke_llm=Mock(), + ) + + +@pytest.mark.parametrize( + ("model_instance", "expected", "error_match"), + [ + (None, None, "Model not found"), + (_mock_model_instance(schema=None), None, "No model schema found"), + (_mock_model_instance(schema={}), 2048, None), + (_mock_model_instance(schema={ModelPropertyKey.CONTEXT_SIZE: 8192}), 8192, None), + ], + ids=[ + "missing-model", + "missing-schema", + "default-context-size", + "schema-context-size", + ], +) +def test_get_max_llm_context_tokens_branches(model_instance, expected, error_match): + manager = Mock() + manager.get_default_model_instance.return_value = model_instance + + with patch("core.tools.utils.model_invocation_utils.ModelManager", return_value=manager): + if error_match: + with pytest.raises(InvokeModelError, match=error_match): + ModelInvocationUtils.get_max_llm_context_tokens("tenant") + else: + assert ModelInvocationUtils.get_max_llm_context_tokens("tenant") == expected + + +def test_calculate_tokens_handles_missing_model(): + manager = Mock() + manager.get_default_model_instance.return_value = None + with patch("core.tools.utils.model_invocation_utils.ModelManager", return_value=manager): + with pytest.raises(InvokeModelError, match="Model not found"): + ModelInvocationUtils.calculate_tokens("tenant", []) + + +def test_invoke_success_and_error_mappings(): + model_instance = _mock_model_instance(schema={ModelPropertyKey.CONTEXT_SIZE: 2048}) + model_instance.invoke_llm.return_value = SimpleNamespace( + message=SimpleNamespace(content="ok"), + usage=SimpleNamespace( + completion_tokens=7, + completion_unit_price=Decimal("0.1"), + completion_price_unit=Decimal(1), + latency=0.3, + total_price=Decimal("0.7"), + currency="USD", + ), + ) + manager = Mock() + manager.get_default_model_instance.return_value = model_instance + + class _ToolModelInvoke: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + db_mock = SimpleNamespace(session=Mock()) + + with patch("core.tools.utils.model_invocation_utils.ModelManager", return_value=manager): + with patch("core.tools.utils.model_invocation_utils.ToolModelInvoke", _ToolModelInvoke): + with patch("core.tools.utils.model_invocation_utils.db", db_mock): + response = ModelInvocationUtils.invoke( + user_id="u1", + tenant_id="tenant", + tool_type="builtin", + tool_name="tool-a", + prompt_messages=[], + ) + + assert response.message.content == "ok" + assert db_mock.session.add.call_count == 1 + assert db_mock.session.commit.call_count == 2 + + +@pytest.mark.parametrize( + ("exc", "expected"), + [ + (InvokeRateLimitError("rate"), "Invoke rate limit error"), + (InvokeBadRequestError("bad"), "Invoke bad request error"), + (InvokeConnectionError("conn"), "Invoke connection error"), + (InvokeAuthorizationError("auth"), "Invoke authorization error"), + (InvokeServerUnavailableError("down"), "Invoke server unavailable error"), + (RuntimeError("oops"), "Invoke error"), + ], + ids=[ + "rate-limit", + "bad-request", + "connection", + "authorization", + "server-unavailable", + "generic-error", + ], +) +def test_invoke_error_mappings(exc, expected): + model_instance = _mock_model_instance(schema={ModelPropertyKey.CONTEXT_SIZE: 2048}) + model_instance.invoke_llm.side_effect = exc + manager = Mock() + manager.get_default_model_instance.return_value = model_instance + + class _ToolModelInvoke: + def __init__(self, **kwargs): + self.__dict__.update(kwargs) + + db_mock = SimpleNamespace(session=Mock()) + + with patch("core.tools.utils.model_invocation_utils.ModelManager", return_value=manager): + with patch("core.tools.utils.model_invocation_utils.ToolModelInvoke", _ToolModelInvoke): + with patch("core.tools.utils.model_invocation_utils.db", db_mock): + with pytest.raises(InvokeModelError, match=expected): + ModelInvocationUtils.invoke( + user_id="u1", + tenant_id="tenant", + tool_type="builtin", + tool_name="tool-a", + prompt_messages=[], + ) diff --git a/api/tests/unit_tests/core/tools/utils/test_parser.py b/api/tests/unit_tests/core/tools/utils/test_parser.py index f39158aa59..40f91b12a0 100644 --- a/api/tests/unit_tests/core/tools/utils/test_parser.py +++ b/api/tests/unit_tests/core/tools/utils/test_parser.py @@ -1,6 +1,12 @@ +from json.decoder import JSONDecodeError +from unittest.mock import Mock, patch + import pytest from flask import Flask +from yaml import YAMLError +from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter +from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError from core.tools.utils.parser import ApiBasedToolSchemaParser @@ -189,3 +195,225 @@ def test_parse_openapi_to_tool_bundle_default_value_type_casting(app): available_param = params_by_name["available"] assert available_param.type == "boolean" assert available_param.default is True + + +def test_sanitize_default_value_and_type_detection(): + assert ApiBasedToolSchemaParser._sanitize_default_value([]) is None + assert ApiBasedToolSchemaParser._sanitize_default_value({}) is None + assert ApiBasedToolSchemaParser._sanitize_default_value("ok") == "ok" + + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"format": "binary"}) == ToolParameter.ToolParameterType.FILE + ) + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"type": "integer"}) == ToolParameter.ToolParameterType.NUMBER + ) + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"schema": {"type": "boolean"}}) + == ToolParameter.ToolParameterType.BOOLEAN + ) + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"type": "array", "items": {"format": "binary"}}) + == ToolParameter.ToolParameterType.FILES + ) + assert ( + ApiBasedToolSchemaParser._get_tool_parameter_type({"type": "array", "items": {"type": "string"}}) + == ToolParameter.ToolParameterType.ARRAY + ) + assert ApiBasedToolSchemaParser._get_tool_parameter_type({"type": "object"}) is None + + +def test_parse_openapi_to_tool_bundle_server_env_and_refs(app): + openapi = { + "openapi": "3.0.0", + "info": {"title": "API", "version": "1.0.0", "description": "API description"}, + "servers": [ + {"url": "https://dev.example.com", "env": "dev"}, + {"url": "https://prod.example.com", "env": "prod"}, + ], + "paths": { + "/items": { + "post": { + "description": "Create item", + "parameters": [ + {"$ref": "#/components/parameters/token"}, + {"name": "token", "schema": {"type": "string"}}, + ], + "requestBody": { + "content": {"application/json": {"schema": {"$ref": "#/components/schemas/ItemRequest"}}} + }, + } + } + }, + "components": { + "parameters": { + "token": {"name": "token", "required": True, "schema": {"type": "string"}}, + }, + "schemas": { + "ItemRequest": { + "type": "object", + "required": ["age"], + "properties": {"age": {"type": "integer", "description": "Age", "default": 18}}, + } + }, + }, + } + + extra_info: dict = {} + warning: dict = {} + with app.test_request_context(headers={"X-Request-Env": "prod"}): + bundles = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning) + + assert len(bundles) == 1 + assert bundles[0].server_url == "https://prod.example.com/items" + assert warning["duplicated_parameter"].startswith("Parameter token") + assert extra_info["description"] == "API description" + + +def test_parse_openapi_to_tool_bundle_no_server_raises(app): + openapi = {"info": {"title": "x"}, "servers": [], "paths": {}} + with app.test_request_context(): + with pytest.raises(ToolProviderNotFoundError, match="No server found"): + ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi) + + +def test_parse_openapi_yaml_to_tool_bundle_invalid_yaml(app): + with app.test_request_context(): + with pytest.raises(ToolApiSchemaError, match="Invalid openapi yaml"): + ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle("null") + + +def test_parse_swagger_to_openapi_branches(): + with pytest.raises(ToolApiSchemaError, match="No server found"): + ApiBasedToolSchemaParser.parse_swagger_to_openapi({"info": {}, "paths": {}}) + + with pytest.raises(ToolApiSchemaError, match="No paths found"): + ApiBasedToolSchemaParser.parse_swagger_to_openapi({"servers": [{"url": "https://x"}], "paths": {}}) + + with pytest.raises(ToolApiSchemaError, match="No operationId found"): + ApiBasedToolSchemaParser.parse_swagger_to_openapi( + { + "servers": [{"url": "https://x"}], + "paths": {"/a": {"get": {"summary": "x", "responses": {}}}}, + } + ) + + warning: dict = {"seed": True} + converted = ApiBasedToolSchemaParser.parse_swagger_to_openapi( + { + "servers": [{"url": "https://x"}], + "paths": {"/a": {"get": {"operationId": "getA", "responses": {}}}}, + "definitions": {"A": {"type": "object"}}, + }, + warning=warning, + ) + assert converted["openapi"] == "3.0.0" + assert converted["components"]["schemas"]["A"]["type"] == "object" + assert warning["missing_summary"].startswith("No summary or description found") + + +def test_parse_openai_plugin_json_branches(app): + with app.test_request_context(): + with pytest.raises(ToolProviderNotFoundError, match="Invalid openai plugin json"): + ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle("{bad") + + with pytest.raises(ToolNotSupportedError, match="Only openapi is supported"): + ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle( + '{"api": {"url": "https://x", "type": "graphql"}}' + ) + + +def test_parse_openai_plugin_json_http_branches(app): + with app.test_request_context(): + response = type("Resp", (), {"status_code": 500, "text": "", "close": Mock()})() + with patch("core.tools.utils.parser.httpx.get", return_value=response): + with pytest.raises(ToolProviderNotFoundError, match="cannot get openapi yaml"): + ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle( + '{"api": {"url": "https://x", "type": "openapi"}}' + ) + response.close.assert_called_once() + + success_response = type("Resp", (), {"status_code": 200, "text": "openapi: 3.0.0", "close": Mock()})() + with patch("core.tools.utils.parser.httpx.get", return_value=success_response): + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle", + return_value=["bundle"], + ) as mock_parse: + bundles = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle( + '{"api": {"url": "https://x", "type": "openapi"}}' + ) + assert bundles == ["bundle"] + mock_parse.assert_called_once() + success_response.close.assert_called_once() + + +def test_auto_parse_json_yaml_failure(): + with patch("core.tools.utils.parser.json_loads", side_effect=JSONDecodeError("bad", "x", 0)): + with patch("core.tools.utils.parser.safe_load", side_effect=YAMLError("bad yaml")): + with pytest.raises(ToolApiSchemaError, match="Invalid api schema, schema is neither json nor yaml"): + ApiBasedToolSchemaParser.auto_parse_to_tool_bundle(":::") + + +def test_auto_parse_openapi_success(): + openapi_content = '{"openapi": "3.0.0", "servers": [{"url": "https://x"}], "info": {"title": "x"}, "paths": {}}' + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle", + return_value=["openapi-bundle"], + ): + bundles, schema_type = ApiBasedToolSchemaParser.auto_parse_to_tool_bundle(openapi_content) + + assert bundles == ["openapi-bundle"] + assert schema_type == ApiProviderSchemaType.OPENAPI + + +def test_auto_parse_openapi_then_swagger(): + openapi_content = '{"openapi": "3.0.0", "servers": [{"url": "https://x"}], "info": {"title": "x"}, "paths": {}}' + loaded_content = { + "openapi": "3.0.0", + "servers": [{"url": "https://x"}], + "info": {"title": "x"}, + "paths": {}, + } + converted_swagger = { + "openapi": "3.0.0", + "servers": [{"url": "https://x"}], + "info": {"title": "x"}, + "paths": {}, + } + + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle", + side_effect=[ToolApiSchemaError("openapi error"), ["swagger-bundle"]], + ) as mock_parse_openapi: + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_swagger_to_openapi", + return_value=converted_swagger, + ) as mock_parse_swagger: + bundles, schema_type = ApiBasedToolSchemaParser.auto_parse_to_tool_bundle(openapi_content) + + assert bundles == ["swagger-bundle"] + assert schema_type == ApiProviderSchemaType.SWAGGER + mock_parse_swagger.assert_called_once_with(loaded_content, extra_info={}, warning={}) + assert mock_parse_openapi.call_count == 2 + mock_parse_openapi.assert_any_call(loaded_content, extra_info={}, warning={}) + mock_parse_openapi.assert_any_call(converted_swagger, extra_info={}, warning={}) + + +def test_auto_parse_openapi_swagger_then_plugin(): + openapi_content = '{"openapi": "3.0.0", "servers": [{"url": "https://x"}], "info": {"title": "x"}, "paths": {}}' + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle", + side_effect=ToolApiSchemaError("openapi error"), + ): + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_swagger_to_openapi", + side_effect=ToolApiSchemaError("swagger error"), + ): + with patch( + "core.tools.utils.parser.ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle", + return_value=["plugin-bundle"], + ): + bundles, schema_type = ApiBasedToolSchemaParser.auto_parse_to_tool_bundle(openapi_content) + + assert bundles == ["plugin-bundle"] + assert schema_type == ApiProviderSchemaType.OPENAI_PLUGIN diff --git a/api/tests/unit_tests/core/tools/utils/test_system_oauth_encryption.py b/api/tests/unit_tests/core/tools/utils/test_system_oauth_encryption.py new file mode 100644 index 0000000000..5691f33e65 --- /dev/null +++ b/api/tests/unit_tests/core/tools/utils/test_system_oauth_encryption.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +import pytest + +from core.tools.utils import system_oauth_encryption as oauth_encryption +from core.tools.utils.system_oauth_encryption import OAuthEncryptionError, SystemOAuthEncrypter + + +def test_system_oauth_encrypter_roundtrip(): + encrypter = SystemOAuthEncrypter(secret_key="test-secret") + payload = {"client_id": "cid", "client_secret": "csecret", "grant_type": "authorization_code"} + + encrypted = encrypter.encrypt_oauth_params(payload) + decrypted = encrypter.decrypt_oauth_params(encrypted) + + assert encrypted + assert dict(decrypted) == payload + + +def test_system_oauth_encrypter_decrypt_validates_input(): + encrypter = SystemOAuthEncrypter(secret_key="test-secret") + + with pytest.raises(ValueError, match="must be a string"): + encrypter.decrypt_oauth_params(123) # type: ignore[arg-type] + + with pytest.raises(ValueError, match="cannot be empty"): + encrypter.decrypt_oauth_params("") + + +def test_system_oauth_encrypter_raises_oauth_error_for_invalid_ciphertext(): + encrypter = SystemOAuthEncrypter(secret_key="test-secret") + + with pytest.raises(OAuthEncryptionError, match="Decryption failed"): + encrypter.decrypt_oauth_params("not-base64") + + +def test_system_oauth_helpers_use_global_cached_instance(monkeypatch): + monkeypatch.setattr(oauth_encryption, "_oauth_encrypter", None) + monkeypatch.setattr("core.tools.utils.system_oauth_encryption.dify_config.SECRET_KEY", "global-secret") + + first = oauth_encryption.get_system_oauth_encrypter() + second = oauth_encryption.get_system_oauth_encrypter() + assert first is second + + encrypted = oauth_encryption.encrypt_system_oauth_params({"k": "v"}) + assert oauth_encryption.decrypt_system_oauth_params(encrypted) == {"k": "v"} + + +def test_create_system_oauth_encrypter_factory(): + encrypter = oauth_encryption.create_system_oauth_encrypter(secret_key="factory-secret") + assert isinstance(encrypter, SystemOAuthEncrypter) diff --git a/api/tests/unit_tests/core/tools/utils/test_workflow_configuration_sync.py b/api/tests/unit_tests/core/tools/utils/test_workflow_configuration_sync.py index c46e31d90f..dd79b79718 100644 --- a/api/tests/unit_tests/core/tools/utils/test_workflow_configuration_sync.py +++ b/api/tests/unit_tests/core/tools/utils/test_workflow_configuration_sync.py @@ -1,7 +1,9 @@ import pytest +from core.tools.entities.tool_entities import ToolParameter, WorkflowToolParameterConfiguration from core.tools.errors import WorkflowToolHumanInputNotSupportedError from core.tools.utils.workflow_configuration_sync import WorkflowToolConfigurationUtils +from dify_graph.variables.input_entities import VariableEntity, VariableEntityType def test_ensure_no_human_input_nodes_passes_for_non_human_input(): @@ -31,3 +33,91 @@ def test_ensure_no_human_input_nodes_raises_for_human_input(): WorkflowToolConfigurationUtils.ensure_no_human_input_nodes(graph) assert exc_info.value.error_code == "workflow_tool_human_input_not_supported" + + +def test_get_workflow_graph_variables_and_outputs(): + graph = { + "nodes": [ + { + "id": "start", + "data": { + "type": "start", + "variables": [ + { + "variable": "query", + "label": "Query", + "type": "text-input", + "required": True, + } + ], + }, + }, + { + "id": "end-1", + "data": { + "type": "end", + "outputs": [ + {"variable": "answer", "value_type": "string", "value_selector": ["n1", "answer"]}, + {"variable": "score", "value_type": "number", "value_selector": ["n1", "score"]}, + ], + }, + }, + { + "id": "end-2", + "data": { + "type": "end", + "outputs": [ + {"variable": "answer", "value_type": "object", "value_selector": ["n2", "answer"]}, + ], + }, + }, + ] + } + + variables = WorkflowToolConfigurationUtils.get_workflow_graph_variables(graph) + assert len(variables) == 1 + assert variables[0].variable == "query" + assert variables[0].type == VariableEntityType.TEXT_INPUT + + outputs = WorkflowToolConfigurationUtils.get_workflow_graph_output(graph) + assert [output.variable for output in outputs] == ["answer", "score"] + assert outputs[0].value_type == "object" + assert outputs[1].value_type == "number" + + no_start = WorkflowToolConfigurationUtils.get_workflow_graph_variables({"nodes": []}) + assert no_start == [] + + +def test_check_is_synced_validation(): + variables = [ + VariableEntity( + variable="query", + label="Query", + type=VariableEntityType.TEXT_INPUT, + required=True, + ) + ] + configs = [ + WorkflowToolParameterConfiguration( + name="query", + description="desc", + form=ToolParameter.ToolParameterForm.FORM, + ) + ] + + WorkflowToolConfigurationUtils.check_is_synced(variables=variables, tool_configurations=configs) + + with pytest.raises(ValueError, match="parameter configuration mismatch"): + WorkflowToolConfigurationUtils.check_is_synced(variables=variables, tool_configurations=[]) + + with pytest.raises(ValueError, match="parameter configuration mismatch"): + WorkflowToolConfigurationUtils.check_is_synced( + variables=variables, + tool_configurations=[ + WorkflowToolParameterConfiguration( + name="other", + description="desc", + form=ToolParameter.ToolParameterForm.FORM, + ) + ], + ) diff --git a/api/tests/unit_tests/core/tools/workflow_as_tool/test_provider.py b/api/tests/unit_tests/core/tools/workflow_as_tool/test_provider.py new file mode 100644 index 0000000000..dd140cbb27 --- /dev/null +++ b/api/tests/unit_tests/core/tools/workflow_as_tool/test_provider.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock, Mock, patch + +import pytest + +from core.tools.entities.common_entities import I18nObject +from core.tools.entities.tool_entities import ( + ToolParameter, + ToolProviderEntity, + ToolProviderIdentity, + ToolProviderType, +) +from core.tools.workflow_as_tool.provider import WorkflowToolProviderController +from dify_graph.variables.input_entities import VariableEntity, VariableEntityType + + +def _controller() -> WorkflowToolProviderController: + entity = ToolProviderEntity( + identity=ToolProviderIdentity( + author="author", + name="wf-provider", + description=I18nObject(en_US="desc"), + icon="icon.svg", + label=I18nObject(en_US="WF"), + ), + credentials_schema=[], + ) + return WorkflowToolProviderController(entity=entity, provider_id="provider-1") + + +def _mock_session_with_begin() -> Mock: + session = Mock() + begin_cm = Mock() + begin_cm.__enter__ = Mock(return_value=None) + begin_cm.__exit__ = Mock(return_value=False) + session.begin.return_value = begin_cm + return session + + +def test_get_db_provider_tool_builds_entity(): + controller = _controller() + session = Mock() + workflow = SimpleNamespace(graph_dict={"nodes": []}, features_dict={}) + session.query.return_value.where.return_value.first.return_value = workflow + app = SimpleNamespace(id="app-1") + db_provider = SimpleNamespace( + id="provider-1", + app_id="app-1", + version="1", + label="WF Provider", + description="desc", + icon="icon.svg", + name="workflow_tool", + tenant_id="tenant-1", + user_id="user-1", + parameter_configurations=[ + SimpleNamespace(name="country", description="Country", form=ToolParameter.ToolParameterForm.FORM), + SimpleNamespace(name="files", description="files", form=ToolParameter.ToolParameterForm.FORM), + ], + ) + user = SimpleNamespace(name="Alice") + variables = [ + VariableEntity( + variable="country", + label="Country", + description="Country", + type=VariableEntityType.SELECT, + required=True, + options=["US", "IN"], + ) + ] + outputs = [ + SimpleNamespace(variable="json", value_type="string"), + SimpleNamespace(variable="answer", value_type="string"), + ] + + with ( + patch( + "core.tools.workflow_as_tool.provider.WorkflowAppConfigManager.convert_features", + return_value=SimpleNamespace(file_upload=True), + ), + patch( + "core.tools.workflow_as_tool.provider.WorkflowToolConfigurationUtils.get_workflow_graph_variables", + return_value=variables, + ), + patch( + "core.tools.workflow_as_tool.provider.WorkflowToolConfigurationUtils.get_workflow_graph_output", + return_value=outputs, + ), + ): + tool = controller._get_db_provider_tool(db_provider, app, session=session, user=user) + + assert tool.entity.identity.name == "workflow_tool" + # "json" output is reserved for ToolInvokeMessage.VariableMessage and filtered out. + assert tool.entity.output_schema["properties"] == {"answer": {"type": "string", "description": ""}} + assert "json" not in tool.entity.output_schema["properties"] + assert tool.entity.parameters[0].type == ToolParameter.ToolParameterType.SELECT + assert tool.entity.parameters[1].type == ToolParameter.ToolParameterType.SYSTEM_FILES + assert controller.provider_type == ToolProviderType.WORKFLOW + + +def test_get_tool_returns_hit_or_none(): + controller = _controller() + tool = SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="workflow_tool"))) + controller.tools = [tool] + + assert controller.get_tool("workflow_tool") is tool + assert controller.get_tool("missing") is None + + +def test_get_tools_returns_cached(): + controller = _controller() + cached_tools = [SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="wf-cached")))] + controller.tools = cached_tools # type: ignore[assignment] + + assert controller.get_tools("tenant-1") == cached_tools + + +def test_from_db_builds_controller(): + controller = _controller() + + app = SimpleNamespace(id="app-1") + user = SimpleNamespace(name="Alice") + db_provider = SimpleNamespace( + id="provider-1", + app_id="app-1", + version="1", + user_id="user-1", + label="WF Provider", + description="desc", + icon="icon.svg", + name="workflow_tool", + tenant_id="tenant-1", + parameter_configurations=[], + ) + session = _mock_session_with_begin() + session.query.return_value.where.return_value.first.return_value = db_provider + session.get.side_effect = [app, user] + fake_cm = MagicMock() + fake_cm.__enter__.return_value = session + fake_cm.__exit__.return_value = False + fake_session_factory = Mock() + fake_session_factory.create_session.return_value = fake_cm + + with patch("core.tools.workflow_as_tool.provider.session_factory", fake_session_factory): + with patch.object( + WorkflowToolProviderController, + "_get_db_provider_tool", + return_value=SimpleNamespace(entity=SimpleNamespace(identity=SimpleNamespace(name="wf"))), + ): + built = WorkflowToolProviderController.from_db(db_provider) + assert isinstance(built, WorkflowToolProviderController) + assert built.tools + + +def test_get_tools_returns_empty_when_provider_missing(): + controller = _controller() + controller.tools = None # type: ignore[assignment] + + with patch("core.tools.workflow_as_tool.provider.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.workflow_as_tool.provider.Session") as session_cls: + session = _mock_session_with_begin() + session.query.return_value.where.return_value.first.return_value = None + session_cls.return_value.__enter__.return_value = session + + assert controller.get_tools("tenant-1") == [] + + +def test_get_tools_raises_when_app_missing(): + controller = _controller() + controller.tools = None # type: ignore[assignment] + db_provider = SimpleNamespace( + id="provider-1", + app_id="app-1", + version="1", + user_id="user-1", + label="WF Provider", + description="desc", + icon="icon.svg", + name="workflow_tool", + tenant_id="tenant-1", + parameter_configurations=[], + ) + + with patch("core.tools.workflow_as_tool.provider.db") as mock_db: + mock_db.engine = object() + with patch("core.tools.workflow_as_tool.provider.Session") as session_cls: + session = _mock_session_with_begin() + session.query.return_value.where.return_value.first.return_value = db_provider + session.get.return_value = None + session_cls.return_value.__enter__.return_value = session + with pytest.raises(ValueError, match="app not found"): + controller.get_tools("tenant-1") diff --git a/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py b/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py index 36fdb0218c..cc00f79698 100644 --- a/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py +++ b/api/tests/unit_tests/core/tools/workflow_as_tool/test_tool.py @@ -1,20 +1,85 @@ +"""Unit tests for workflow-as-tool behavior. + +StubSession/StubScalars emulate SQLAlchemy session/scalars with minimal methods +(`scalar`, `scalars`, `expunge`, `commit`, `refresh`, context manager) to keep +database access mocked and predictable in tests. +""" + +import json from types import SimpleNamespace +from typing import Any +from unittest.mock import MagicMock, Mock, patch import pytest from core.app.entities.app_invoke_entities import InvokeFrom from core.tools.__base.tool_runtime import ToolRuntime from core.tools.entities.common_entities import I18nObject -from core.tools.entities.tool_entities import ToolEntity, ToolIdentity, ToolInvokeMessage +from core.tools.entities.tool_entities import ( + ToolEntity, + ToolIdentity, + ToolInvokeMessage, + ToolParameter, + ToolProviderType, +) from core.tools.errors import ToolInvokeError from core.tools.workflow_as_tool.tool import WorkflowTool +from dify_graph.file import FILE_MODEL_IDENTITY -def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_field(monkeypatch: pytest.MonkeyPatch): - """Ensure that WorkflowTool will throw a `ToolInvokeError` exception when - `WorkflowAppGenerator.generate` returns a result with `error` key inside - the `data` element. - """ +class StubScalars: + """Minimal stub for SQLAlchemy scalar results.""" + + _value: Any + + def __init__(self, value: Any) -> None: + self._value = value + + def first(self) -> Any: + return self._value + + +class StubSession: + """Minimal stub for session_factory-created sessions.""" + + scalar_results: list[Any] + scalars_results: list[Any] + expunge_calls: list[object] + + def __init__(self, *, scalar_results: list[Any] | None = None, scalars_results: list[Any] | None = None) -> None: + self.scalar_results = list(scalar_results or []) + self.scalars_results = list(scalars_results or []) + self.expunge_calls: list[object] = [] + + def scalar(self, _stmt: Any) -> Any: + return self.scalar_results.pop(0) + + def scalars(self, _stmt: Any) -> StubScalars: + return StubScalars(self.scalars_results.pop(0)) + + def expunge(self, value: Any) -> None: + self.expunge_calls.append(value) + + def begin(self) -> "StubSession": + return self + + def commit(self) -> None: + pass + + def refresh(self, _value: Any) -> None: + pass + + def close(self) -> None: + pass + + def __enter__(self) -> "StubSession": + return self + + def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> bool: + return False + + +def _build_tool() -> WorkflowTool: entity = ToolEntity( identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), parameters=[], @@ -22,9 +87,9 @@ def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_fiel has_runtime_parameters=False, ) runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", + return WorkflowTool( + workflow_app_id="app-1", + workflow_as_tool_id="wf-tool-1", version="1", workflow_entities={}, workflow_call_depth=1, @@ -32,13 +97,19 @@ def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_fiel runtime=runtime, ) + +def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_field(monkeypatch: pytest.MonkeyPatch): + """Ensure that WorkflowTool will throw a `ToolInvokeError` exception when + `WorkflowAppGenerator.generate` returns a result with `error` key inside + the `data` element. + """ + tool = _build_tool() + # needs to patch those methods to avoid database access. monkeypatch.setattr(tool, "_get_app", lambda *args, **kwargs: None) monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) # Mock user resolution to avoid database access - from unittest.mock import Mock - mock_user = Mock() monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: mock_user) @@ -56,28 +127,12 @@ def test_workflow_tool_should_raise_tool_invoke_error_when_result_has_error_fiel def test_workflow_tool_does_not_use_pause_state_config(monkeypatch: pytest.MonkeyPatch): - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + """Ensure pause_state_config is passed as None.""" + tool = _build_tool() monkeypatch.setattr(tool, "_get_app", lambda *args, **kwargs: None) monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) - from unittest.mock import MagicMock, Mock - mock_user = Mock() monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: mock_user) @@ -94,22 +149,7 @@ def test_workflow_tool_does_not_use_pause_state_config(monkeypatch: pytest.Monke def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch: pytest.MonkeyPatch): """Test that WorkflowTool should generate variable messages when there are outputs""" - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + tool = _build_tool() # Mock workflow outputs mock_outputs = {"result": "success", "count": 42, "data": {"key": "value"}} @@ -119,8 +159,6 @@ def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) # Mock user resolution to avoid database access - from unittest.mock import Mock - mock_user = Mock() monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: mock_user) @@ -134,10 +172,6 @@ def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch # Execute tool invocation messages = list(tool.invoke("test_user", {})) - # Verify generated messages - # Should contain: 3 variable messages + 1 text message + 1 JSON message = 5 messages - assert len(messages) == 5 - # Verify variable messages variable_messages = [msg for msg in messages if msg.type == ToolInvokeMessage.MessageType.VARIABLE] assert len(variable_messages) == 3 @@ -151,7 +185,7 @@ def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch # Verify text message text_messages = [msg for msg in messages if msg.type == ToolInvokeMessage.MessageType.TEXT] assert len(text_messages) == 1 - assert '{"result": "success", "count": 42, "data": {"key": "value"}}' in text_messages[0].message.text + assert json.loads(text_messages[0].message.text) == mock_outputs # Verify JSON message json_messages = [msg for msg in messages if msg.type == ToolInvokeMessage.MessageType.JSON] @@ -161,30 +195,13 @@ def test_workflow_tool_should_generate_variable_messages_for_outputs(monkeypatch def test_workflow_tool_should_handle_empty_outputs(monkeypatch: pytest.MonkeyPatch): """Test that WorkflowTool should handle empty outputs correctly""" - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + tool = _build_tool() # needs to patch those methods to avoid database access. monkeypatch.setattr(tool, "_get_app", lambda *args, **kwargs: None) monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) # Mock user resolution to avoid database access - from unittest.mock import Mock - mock_user = Mock() monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: mock_user) @@ -217,61 +234,32 @@ def test_workflow_tool_should_handle_empty_outputs(monkeypatch: pytest.MonkeyPat assert json_messages[0].message.json_object == {} -def test_create_variable_message(): - """Test the functionality of creating variable messages""" - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) - - # Test different types of variable values - test_cases = [ +@pytest.mark.parametrize( + ("var_name", "var_value"), + [ ("string_var", "test string"), ("int_var", 42), ("float_var", 3.14), ("bool_var", True), ("list_var", [1, 2, 3]), ("dict_var", {"key": "value"}), - ] + ], +) +def test_create_variable_message(var_name, var_value): + """Create variable messages for multiple value types.""" + tool = _build_tool() - for var_name, var_value in test_cases: - message = tool.create_variable_message(var_name, var_value) + message = tool.create_variable_message(var_name, var_value) - assert message.type == ToolInvokeMessage.MessageType.VARIABLE - assert message.message.variable_name == var_name - assert message.message.variable_value == var_value - assert message.message.stream is False + assert message.type == ToolInvokeMessage.MessageType.VARIABLE + assert message.message.variable_name == var_name + assert message.message.variable_value == var_value + assert message.message.stream is False def test_create_file_message_should_include_file_marker(): - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="test_tool", invoke_from=InvokeFrom.EXPLORE) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + """Ensure file message includes marker and meta payload.""" + tool = _build_tool() file_obj = object() message = tool.create_file_message(file_obj) # type: ignore[arg-type] @@ -284,103 +272,247 @@ def test_create_file_message_should_include_file_marker(): def test_resolve_user_from_database_falls_back_to_end_user(monkeypatch: pytest.MonkeyPatch): """Ensure worker context can resolve EndUser when Account is missing.""" - class StubSession: - def __init__(self, results: list): - self.results = results - - def scalar(self, _stmt): - return self.results.pop(0) - - # SQLAlchemy Session APIs used by code under test - def expunge(self, *_args, **_kwargs): - pass - - def close(self): - pass - - # support `with session_factory.create_session() as session:` - def __enter__(self): - return self - - def __exit__(self, exc_type, exc, tb): - self.close() - tenant = SimpleNamespace(id="tenant_id") end_user = SimpleNamespace(id="end_user_id", tenant_id="tenant_id") # Monkeypatch session factory to return our stub session + stub_session = StubSession(scalar_results=[tenant, None, end_user]) monkeypatch.setattr( "core.tools.workflow_as_tool.tool.session_factory.create_session", - lambda: StubSession([tenant, None, end_user]), + lambda: stub_session, ) - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="tenant_id", invoke_from=InvokeFrom.SERVICE_API) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + tool = _build_tool() + tool.runtime.invoke_from = InvokeFrom.SERVICE_API + tool.runtime.tenant_id = "tenant_id" resolved_user = tool._resolve_user_from_database(user_id=end_user.id) assert resolved_user is end_user + assert stub_session.expunge_calls == [end_user] def test_resolve_user_from_database_returns_none_when_no_tenant(monkeypatch: pytest.MonkeyPatch): """Return None if tenant cannot be found in worker context.""" - class StubSession: - def __init__(self, results: list): - self.results = results - - def scalar(self, _stmt): - return self.results.pop(0) - - def expunge(self, *_args, **_kwargs): - pass - - def close(self): - pass - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc, tb): - self.close() - # Monkeypatch session factory to return our stub session with no tenant monkeypatch.setattr( "core.tools.workflow_as_tool.tool.session_factory.create_session", - lambda: StubSession([None]), + lambda: StubSession(scalar_results=[None]), ) - entity = ToolEntity( - identity=ToolIdentity(author="test", name="test tool", label=I18nObject(en_US="test tool"), provider="test"), - parameters=[], - description=None, - has_runtime_parameters=False, - ) - runtime = ToolRuntime(tenant_id="missing_tenant", invoke_from=InvokeFrom.SERVICE_API) - tool = WorkflowTool( - workflow_app_id="", - workflow_as_tool_id="", - version="1", - workflow_entities={}, - workflow_call_depth=1, - entity=entity, - runtime=runtime, - ) + tool = _build_tool() + tool.runtime.invoke_from = InvokeFrom.SERVICE_API + tool.runtime.tenant_id = "missing_tenant" resolved_user = tool._resolve_user_from_database(user_id="any") assert resolved_user is None + + +def test_workflow_tool_provider_type_and_fork_runtime(): + """Verify provider type and forked runtime behavior.""" + tool = _build_tool() + assert tool.tool_provider_type() == ToolProviderType.WORKFLOW + assert tool.latest_usage.total_tokens == 0 + + forked = tool.fork_tool_runtime(ToolRuntime(tenant_id="tenant-2", invoke_from=InvokeFrom.DEBUGGER)) + assert isinstance(forked, WorkflowTool) + assert forked.workflow_app_id == tool.workflow_app_id + assert forked.runtime.tenant_id == "tenant-2" + + +def test_derive_usage_from_top_level_usage_key(): + """Derive usage from top-level usage dict.""" + usage = WorkflowTool._derive_usage_from_result({"usage": {"total_tokens": 12, "total_price": "0.2"}}) + assert usage.total_tokens == 12 + + +def test_derive_usage_from_metadata_usage(): + """Derive usage from metadata usage dict.""" + metadata_usage = WorkflowTool._derive_usage_from_result({"metadata": {"usage": {"total_tokens": 7}}}) + assert metadata_usage.total_tokens == 7 + + +def test_derive_usage_from_totals(): + """Derive usage from top-level totals fields.""" + totals_usage = WorkflowTool._derive_usage_from_result( + {"total_tokens": "9", "total_price": "1.3", "currency": "USD"} + ) + assert totals_usage.total_tokens == 9 + assert str(totals_usage.total_price) == "1.3" + + +def test_derive_usage_from_empty(): + """Default usage values when result is empty.""" + empty_usage = WorkflowTool._derive_usage_from_result({}) + assert empty_usage.total_tokens == 0 + + +def test_extract_usage_from_nested(): + """Extract nested usage dict from result payloads.""" + nested = WorkflowTool._extract_usage_dict({"nested": [{"data": {"usage": {"total_tokens": 3}}}]}) + assert nested == {"total_tokens": 3} + + +def test_invoke_raises_when_user_not_found(monkeypatch: pytest.MonkeyPatch): + """Raise ToolInvokeError when user resolution fails.""" + tool = _build_tool() + monkeypatch.setattr(tool, "_get_app", lambda *args, **kwargs: None) + monkeypatch.setattr(tool, "_get_workflow", lambda *args, **kwargs: None) + monkeypatch.setattr(tool, "_resolve_user", lambda *args, **kwargs: None) + + with pytest.raises(ToolInvokeError, match="User not found"): + list(tool.invoke("missing", {})) + + +def test_resolve_user_from_database_returns_account(monkeypatch: pytest.MonkeyPatch): + """Resolve Account and set tenant in worker context.""" + tenant = SimpleNamespace(id="tenant_id") + account = SimpleNamespace(id="account_id", current_tenant=None) + session = StubSession(scalar_results=[tenant, account]) + + monkeypatch.setattr("core.tools.workflow_as_tool.tool.session_factory.create_session", lambda: session) + tool = _build_tool() + tool.runtime.tenant_id = "tenant_id" + + resolved = tool._resolve_user_from_database(user_id="account_id") + assert resolved is account + assert account.current_tenant is tenant + assert session.expunge_calls == [account] + + +def test_get_workflow_and_get_app_db_branches(monkeypatch: pytest.MonkeyPatch): + """Cover workflow/app retrieval branches and error cases.""" + tool = _build_tool() + latest_workflow = SimpleNamespace(id="wf-latest") + specific_workflow = SimpleNamespace(id="wf-v1") + app = SimpleNamespace(id="app-1") + sessions = iter( + [ + StubSession(scalar_results=[], scalars_results=[latest_workflow]), + StubSession(scalar_results=[specific_workflow], scalars_results=[]), + StubSession(scalar_results=[app], scalars_results=[]), + ] + ) + monkeypatch.setattr( + "core.tools.workflow_as_tool.tool.session_factory.create_session", + lambda: next(sessions), + ) + + assert tool._get_workflow("app-1", "") is latest_workflow + assert tool._get_workflow("app-1", "1") is specific_workflow + assert tool._get_app("app-1") is app + + monkeypatch.setattr( + "core.tools.workflow_as_tool.tool.session_factory.create_session", + lambda: StubSession(scalar_results=[None, None], scalars_results=[None]), + ) + with pytest.raises(ValueError, match="workflow not found"): + tool._get_workflow("app-1", "1") + with pytest.raises(ValueError, match="app not found"): + tool._get_app("app-1") + + +def _setup_transform_args_tool(monkeypatch: pytest.MonkeyPatch) -> WorkflowTool: + """Build a WorkflowTool and stub merged runtime parameters for files/query.""" + tool = _build_tool() + files_param = ToolParameter.get_simple_instance( + name="files", + llm_description="files", + typ=ToolParameter.ToolParameterType.SYSTEM_FILES, + required=False, + ) + files_param.form = ToolParameter.ToolParameterForm.FORM + text_param = ToolParameter.get_simple_instance( + name="query", + llm_description="query", + typ=ToolParameter.ToolParameterType.STRING, + required=False, + ) + text_param.form = ToolParameter.ToolParameterForm.FORM + + monkeypatch.setattr(tool, "get_merged_runtime_parameters", lambda: [files_param, text_param]) + return tool + + +def test_transform_args_valid_files(monkeypatch: pytest.MonkeyPatch): + """Transform args into parameters and files payloads.""" + tool = _setup_transform_args_tool(monkeypatch) + + params, files = tool._transform_args( + { + "query": "hello", + "files": [ + { + "tenant_id": "tenant-1", + "type": "image", + "transfer_method": "tool_file", + "related_id": "tool-1", + "extension": ".png", + }, + { + "tenant_id": "tenant-1", + "type": "document", + "transfer_method": "local_file", + "related_id": "upload-1", + }, + { + "tenant_id": "tenant-1", + "type": "document", + "transfer_method": "remote_url", + "remote_url": "https://example.com/a.pdf", + }, + ], + } + ) + assert params == {"query": "hello"} + assert any(file_item.get("tool_file_id") == "tool-1" for file_item in files) + assert any(file_item.get("upload_file_id") == "upload-1" for file_item in files) + assert any(file_item.get("url") == "https://example.com/a.pdf" for file_item in files) + + +def test_transform_args_invalid_files(monkeypatch: pytest.MonkeyPatch): + """Ignore invalid file entries while keeping params.""" + tool = _setup_transform_args_tool(monkeypatch) + invalid_params, invalid_files = tool._transform_args({"query": "hello", "files": [{"invalid": True}]}) + assert invalid_params == {"query": "hello"} + assert invalid_files == [] + + +def test_extract_files(): + """Extract file outputs into result and file list.""" + tool = _build_tool() + built_files = [ + SimpleNamespace(id="file-1"), + SimpleNamespace(id="file-2"), + ] + with patch("core.tools.workflow_as_tool.tool.build_from_mapping", side_effect=built_files): + outputs = { + "attachments": [ + { + "dify_model_identity": FILE_MODEL_IDENTITY, + "transfer_method": "tool_file", + "related_id": "r1", + } + ], + "single_file": { + "dify_model_identity": FILE_MODEL_IDENTITY, + "transfer_method": "local_file", + "related_id": "r2", + }, + "text": "ok", + } + result, extracted_files = tool._extract_files(outputs) + + assert result["text"] == "ok" + assert len(extracted_files) == 2 + + +def test_update_file_mapping(): + """Map tool/local file transfer methods into output shape.""" + tool = _build_tool() + tool_file = tool._update_file_mapping({"transfer_method": "tool_file", "related_id": "tool-1"}) + assert tool_file["tool_file_id"] == "tool-1" + local_file = tool._update_file_mapping({"transfer_method": "local_file", "related_id": "upload-1"}) + assert local_file["upload_file_id"] == "upload-1" diff --git a/api/tests/unit_tests/core/trigger/__init__.py b/api/tests/unit_tests/core/trigger/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/trigger/conftest.py b/api/tests/unit_tests/core/trigger/conftest.py new file mode 100644 index 0000000000..d9da80a8b7 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/conftest.py @@ -0,0 +1,93 @@ +"""Shared factory helpers for core.trigger test suite.""" + +from __future__ import annotations + +from typing import Any + +from core.entities.provider_entities import ProviderConfig +from core.tools.entities.common_entities import I18nObject +from core.trigger.entities.entities import ( + EventEntity, + EventIdentity, + EventParameter, + OAuthSchema, + Subscription, + SubscriptionConstructor, + TriggerProviderEntity, + TriggerProviderIdentity, +) +from core.trigger.provider import PluginTriggerProviderController +from models.provider_ids import TriggerProviderID + +# Valid format for TriggerProviderID: org/plugin/provider +VALID_PROVIDER_ID = "testorg/testplugin/testprovider" + + +def i18n(text: str = "test") -> I18nObject: + return I18nObject(en_US=text, zh_Hans=text) + + +def make_event(name: str = "test_event", parameters: list[EventParameter] | None = None) -> EventEntity: + return EventEntity( + identity=EventIdentity(author="a", name=name, label=i18n(name)), + description=i18n(name), + parameters=parameters or [], + ) + + +def make_provider_entity( + name: str = "test_provider", + events: list[EventEntity] | None = None, + constructor: SubscriptionConstructor | None = None, + subscription_schema: list[ProviderConfig] | None = None, + icon: str | None = "icon.png", + icon_dark: str | None = None, +) -> TriggerProviderEntity: + return TriggerProviderEntity( + identity=TriggerProviderIdentity( + author="a", + name=name, + label=i18n(name), + description=i18n(name), + icon=icon, + icon_dark=icon_dark, + ), + events=events if events is not None else [make_event()], + subscription_constructor=constructor, + subscription_schema=subscription_schema or [], + ) + + +def make_controller( + entity: TriggerProviderEntity | None = None, + tenant_id: str = "tenant-1", + provider_id: str = VALID_PROVIDER_ID, +) -> PluginTriggerProviderController: + return PluginTriggerProviderController( + entity=entity or make_provider_entity(), + plugin_id="plugin-1", + plugin_unique_identifier="uid-1", + provider_id=TriggerProviderID(provider_id), + tenant_id=tenant_id, + ) + + +def make_subscription(**overrides: Any) -> Subscription: + defaults = {"expires_at": 9999999999, "endpoint": "https://hook.test", "properties": {"k": "v"}, "parameters": {}} + defaults.update(overrides) + return Subscription(**defaults) + + +def make_provider_config( + name: str = "api_key", required: bool = True, config_type: str = "secret-input" +) -> ProviderConfig: + return ProviderConfig(name=name, label=i18n(name), type=config_type, required=required) + + +def make_constructor( + credentials_schema: list[ProviderConfig] | None = None, + oauth_schema: OAuthSchema | None = None, +) -> SubscriptionConstructor: + return SubscriptionConstructor( + parameters=[], credentials_schema=credentials_schema or [], oauth_schema=oauth_schema + ) diff --git a/api/tests/unit_tests/core/trigger/debug/__init__.py b/api/tests/unit_tests/core/trigger/debug/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/trigger/debug/test_debug_event_bus.py b/api/tests/unit_tests/core/trigger/debug/test_debug_event_bus.py new file mode 100644 index 0000000000..d557c20f5e --- /dev/null +++ b/api/tests/unit_tests/core/trigger/debug/test_debug_event_bus.py @@ -0,0 +1,93 @@ +""" +Tests for core.trigger.debug.event_bus.TriggerDebugEventBus. + +Covers: Lua-script dispatch/poll with Redis error resilience. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from redis import RedisError + +from core.trigger.debug.event_bus import TriggerDebugEventBus +from core.trigger.debug.events import PluginTriggerDebugEvent + + +class TestDispatch: + @patch("core.trigger.debug.event_bus.redis_client") + def test_returns_dispatch_count(self, mock_redis): + mock_redis.eval.return_value = 3 + event = MagicMock() + event.model_dump_json.return_value = '{"test": true}' + + result = TriggerDebugEventBus.dispatch("tenant-1", event, "pool:key") + + assert result == 3 + mock_redis.eval.assert_called_once() + + @patch("core.trigger.debug.event_bus.redis_client") + def test_redis_error_returns_zero(self, mock_redis): + mock_redis.eval.side_effect = RedisError("connection lost") + event = MagicMock() + event.model_dump_json.return_value = "{}" + + result = TriggerDebugEventBus.dispatch("tenant-1", event, "pool:key") + + assert result == 0 + + +class TestPoll: + @patch("core.trigger.debug.event_bus.redis_client") + def test_returns_deserialized_event(self, mock_redis): + event_json = PluginTriggerDebugEvent( + timestamp=100, + name="push", + user_id="u1", + request_id="r1", + subscription_id="s1", + provider_id="p1", + ).model_dump_json() + mock_redis.eval.return_value = event_json + + result = TriggerDebugEventBus.poll( + event_type=PluginTriggerDebugEvent, + pool_key="pool:key", + tenant_id="t1", + user_id="u1", + app_id="a1", + node_id="n1", + ) + + assert result is not None + assert result.name == "push" + + @patch("core.trigger.debug.event_bus.redis_client") + def test_returns_none_when_no_event(self, mock_redis): + mock_redis.eval.return_value = None + + result = TriggerDebugEventBus.poll( + event_type=PluginTriggerDebugEvent, + pool_key="pool:key", + tenant_id="t1", + user_id="u1", + app_id="a1", + node_id="n1", + ) + + assert result is None + + @patch("core.trigger.debug.event_bus.redis_client") + def test_redis_error_returns_none(self, mock_redis): + mock_redis.eval.side_effect = RedisError("timeout") + + result = TriggerDebugEventBus.poll( + event_type=PluginTriggerDebugEvent, + pool_key="pool:key", + tenant_id="t1", + user_id="u1", + app_id="a1", + node_id="n1", + ) + + assert result is None diff --git a/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py b/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py new file mode 100644 index 0000000000..bcb1d745e3 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/debug/test_debug_event_selectors.py @@ -0,0 +1,281 @@ +""" +Tests for core.trigger.debug.event_selectors. + +Covers: Plugin/Webhook/Schedule pollers, create_event_poller factory, +and select_trigger_debug_events orchestrator. +""" + +from __future__ import annotations + +from datetime import datetime +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.request import TriggerInvokeEventResponse +from core.trigger.constants import ( + TRIGGER_PLUGIN_NODE_TYPE, + TRIGGER_SCHEDULE_NODE_TYPE, + TRIGGER_WEBHOOK_NODE_TYPE, +) +from core.trigger.debug.event_selectors import ( + PluginTriggerDebugEventPoller, + ScheduleTriggerDebugEventPoller, + WebhookTriggerDebugEventPoller, + create_event_poller, + select_trigger_debug_events, +) +from core.trigger.debug.events import PluginTriggerDebugEvent, WebhookDebugEvent +from dify_graph.enums import BuiltinNodeTypes, NodeType +from tests.unit_tests.core.trigger.conftest import VALID_PROVIDER_ID + + +def _make_poller_args(node_config: dict | None = None) -> dict: + return { + "tenant_id": "t1", + "user_id": "u1", + "app_id": "a1", + "node_config": node_config or {"data": {}}, + "node_id": "n1", + } + + +def _plugin_node_config(provider_id: str = VALID_PROVIDER_ID) -> dict: + """Valid node config for TriggerEventNodeData.model_validate.""" + return { + "data": { + "title": "test", + "plugin_id": "org/testplugin", + "provider_id": provider_id, + "event_name": "push", + "subscription_id": "s1", + "plugin_unique_identifier": "uid-1", + } + } + + +class TestPluginTriggerDebugEventPoller: + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_returns_workflow_args_on_success(self, mock_bus): + event = PluginTriggerDebugEvent( + timestamp=100, + name="push", + user_id="u1", + request_id="r1", + subscription_id="s1", + provider_id="p1", + ) + mock_bus.poll.return_value = event + + with patch("services.trigger.trigger_service.TriggerService") as mock_trigger_svc: + mock_trigger_svc.invoke_trigger_event.return_value = TriggerInvokeEventResponse( + variables={"repo": "dify"}, + cancelled=False, + ) + + poller = PluginTriggerDebugEventPoller(**_make_poller_args(_plugin_node_config())) + result = poller.poll() + + assert result is not None + assert result.workflow_args["inputs"] == {"repo": "dify"} + + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_returns_none_when_no_event(self, mock_bus): + mock_bus.poll.return_value = None + + poller = PluginTriggerDebugEventPoller(**_make_poller_args(_plugin_node_config())) + + assert poller.poll() is None + + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_returns_none_when_invoke_cancelled(self, mock_bus): + event = PluginTriggerDebugEvent( + timestamp=100, + name="push", + user_id="u1", + request_id="r1", + subscription_id="s1", + provider_id="p1", + ) + mock_bus.poll.return_value = event + + with patch("services.trigger.trigger_service.TriggerService") as mock_trigger_svc: + mock_trigger_svc.invoke_trigger_event.return_value = TriggerInvokeEventResponse( + variables={}, + cancelled=True, + ) + + poller = PluginTriggerDebugEventPoller(**_make_poller_args(_plugin_node_config())) + + assert poller.poll() is None + + +class TestWebhookTriggerDebugEventPoller: + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_uses_inputs_directly_when_present(self, mock_bus): + event = WebhookDebugEvent( + timestamp=100, + request_id="r1", + node_id="n1", + payload={"inputs": {"key": "val"}, "webhook_data": {}}, + ) + mock_bus.poll.return_value = event + + poller = WebhookTriggerDebugEventPoller(**_make_poller_args()) + result = poller.poll() + + assert result is not None + assert result.workflow_args["inputs"] == {"key": "val"} + + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_falls_back_to_webhook_data(self, mock_bus): + event = WebhookDebugEvent( + timestamp=100, + request_id="r1", + node_id="n1", + payload={"webhook_data": {"body": "raw"}}, + ) + mock_bus.poll.return_value = event + + with patch("services.trigger.webhook_service.WebhookService") as mock_webhook_svc: + mock_webhook_svc.build_workflow_inputs.return_value = {"parsed": "data"} + + poller = WebhookTriggerDebugEventPoller(**_make_poller_args()) + result = poller.poll() + + assert result is not None + assert result.workflow_args["inputs"] == {"parsed": "data"} + mock_webhook_svc.build_workflow_inputs.assert_called_once_with({"body": "raw"}) + + @patch("core.trigger.debug.event_selectors.TriggerDebugEventBus") + def test_returns_none_when_no_event(self, mock_bus): + mock_bus.poll.return_value = None + poller = WebhookTriggerDebugEventPoller(**_make_poller_args()) + + assert poller.poll() is None + + +class TestScheduleTriggerDebugEventPoller: + def _make_schedule_poller(self, mock_redis, mock_schedule_svc, next_run_at: datetime): + """Set up mocks and create a schedule poller.""" + mock_redis.get.return_value = None + mock_schedule_config = MagicMock() + mock_schedule_config.cron_expression = "0 * * * *" + mock_schedule_config.timezone = "UTC" + mock_schedule_svc.to_schedule_config.return_value = mock_schedule_config + return ScheduleTriggerDebugEventPoller(**_make_poller_args()) + + @patch("core.trigger.debug.event_selectors.redis_client") + @patch("core.trigger.debug.event_selectors.naive_utc_now") + @patch("core.trigger.debug.event_selectors.calculate_next_run_at") + @patch("core.trigger.debug.event_selectors.ensure_naive_utc") + def test_returns_none_when_not_yet_due(self, mock_ensure, mock_calc, mock_now, mock_redis): + now = datetime(2025, 1, 1, 12, 0, 0) + next_run = datetime(2025, 1, 1, 13, 0, 0) # future + mock_now.return_value = now + mock_calc.return_value = next_run + mock_ensure.return_value = next_run + mock_redis.get.return_value = None + + with patch("services.trigger.schedule_service.ScheduleService") as mock_schedule_svc: + mock_schedule_config = MagicMock() + mock_schedule_config.cron_expression = "0 * * * *" + mock_schedule_config.timezone = "UTC" + mock_schedule_svc.to_schedule_config.return_value = mock_schedule_config + + poller = ScheduleTriggerDebugEventPoller(**_make_poller_args()) + + assert poller.poll() is None + + @patch("core.trigger.debug.event_selectors.redis_client") + @patch("core.trigger.debug.event_selectors.naive_utc_now") + @patch("core.trigger.debug.event_selectors.calculate_next_run_at") + @patch("core.trigger.debug.event_selectors.ensure_naive_utc") + def test_fires_event_when_due(self, mock_ensure, mock_calc, mock_now, mock_redis): + now = datetime(2025, 1, 1, 14, 0, 0) + next_run = datetime(2025, 1, 1, 12, 0, 0) # past + mock_now.return_value = now + mock_calc.return_value = next_run + mock_ensure.return_value = next_run + mock_redis.get.return_value = None + + with patch("services.trigger.schedule_service.ScheduleService") as mock_schedule_svc: + mock_schedule_config = MagicMock() + mock_schedule_config.cron_expression = "0 * * * *" + mock_schedule_config.timezone = "UTC" + mock_schedule_svc.to_schedule_config.return_value = mock_schedule_config + + poller = ScheduleTriggerDebugEventPoller(**_make_poller_args()) + result = poller.poll() + + assert result is not None + mock_redis.delete.assert_called_once() + + +class TestCreateEventPoller: + def _workflow_with_node(self, node_type: NodeType): + wf = MagicMock() + wf.get_node_config_by_id.return_value = {"data": {}} + wf.get_node_type_from_node_config.return_value = node_type + return wf + + def test_creates_plugin_poller(self): + wf = self._workflow_with_node(TRIGGER_PLUGIN_NODE_TYPE) + poller = create_event_poller(wf, "t1", "u1", "a1", "n1") + assert isinstance(poller, PluginTriggerDebugEventPoller) + + def test_creates_webhook_poller(self): + wf = self._workflow_with_node(TRIGGER_WEBHOOK_NODE_TYPE) + poller = create_event_poller(wf, "t1", "u1", "a1", "n1") + assert isinstance(poller, WebhookTriggerDebugEventPoller) + + def test_creates_schedule_poller(self): + wf = self._workflow_with_node(TRIGGER_SCHEDULE_NODE_TYPE) + poller = create_event_poller(wf, "t1", "u1", "a1", "n1") + assert isinstance(poller, ScheduleTriggerDebugEventPoller) + + def test_raises_for_unknown_type(self): + wf = MagicMock() + wf.get_node_config_by_id.return_value = {"data": {}} + wf.get_node_type_from_node_config.return_value = BuiltinNodeTypes.START + + with pytest.raises(ValueError): + create_event_poller(wf, "t1", "u1", "a1", "n1") + + def test_raises_when_node_config_missing(self): + wf = MagicMock() + wf.get_node_config_by_id.return_value = None + + with pytest.raises(ValueError): + create_event_poller(wf, "t1", "u1", "a1", "n1") + + +class TestSelectTriggerDebugEvents: + def test_returns_first_non_none_event(self): + wf = MagicMock() + wf.get_node_config_by_id.return_value = {"data": {}} + wf.get_node_type_from_node_config.return_value = TRIGGER_WEBHOOK_NODE_TYPE + app_model = MagicMock() + app_model.tenant_id = "t1" + app_model.id = "a1" + + with patch.object(WebhookTriggerDebugEventPoller, "poll") as mock_poll: + expected = MagicMock() + mock_poll.return_value = expected + + result = select_trigger_debug_events(wf, app_model, "u1", ["n1", "n2"]) + + assert result is expected + + def test_returns_none_when_no_events(self): + wf = MagicMock() + wf.get_node_config_by_id.return_value = {"data": {}} + wf.get_node_type_from_node_config.return_value = TRIGGER_WEBHOOK_NODE_TYPE + app_model = MagicMock() + app_model.tenant_id = "t1" + app_model.id = "a1" + + with patch.object(WebhookTriggerDebugEventPoller, "poll", return_value=None): + result = select_trigger_debug_events(wf, app_model, "u1", ["n1"]) + + assert result is None diff --git a/api/tests/unit_tests/core/trigger/test_provider.py b/api/tests/unit_tests/core/trigger/test_provider.py new file mode 100644 index 0000000000..3c2f297e90 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/test_provider.py @@ -0,0 +1,332 @@ +""" +Tests for core.trigger.provider.PluginTriggerProviderController. + +Covers: to_api_entity creation-method logic, credential validation pipeline, +schema resolution by type, event lookup, dispatch/invoke/subscribe delegation. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.plugin_daemon import CredentialType +from core.trigger.entities.entities import ( + EventParameter, + EventParameterType, + OAuthSchema, + TriggerCreationMethod, +) +from core.trigger.errors import TriggerProviderCredentialValidationError +from tests.unit_tests.core.trigger.conftest import ( + i18n, + make_constructor, + make_controller, + make_event, + make_provider_config, + make_provider_entity, + make_subscription, +) + +ICON_URL = "https://cdn/icon.png" + + +class TestToApiEntity: + @patch("core.trigger.provider.PluginService") + def test_includes_icons_when_present(self, mock_plugin_svc): + mock_plugin_svc.get_plugin_icon_url.return_value = ICON_URL + ctrl = make_controller(entity=make_provider_entity(icon="icon.png", icon_dark="dark.png")) + + api = ctrl.to_api_entity() + + assert api.icon == ICON_URL + assert api.icon_dark == ICON_URL + + @patch("core.trigger.provider.PluginService") + def test_icons_none_when_absent(self, mock_plugin_svc): + ctrl = make_controller(entity=make_provider_entity(icon=None, icon_dark=None)) + + api = ctrl.to_api_entity() + + assert api.icon is None + assert api.icon_dark is None + mock_plugin_svc.get_plugin_icon_url.assert_not_called() + + @patch("core.trigger.provider.PluginService") + def test_manual_only_without_schemas(self, mock_plugin_svc): + mock_plugin_svc.get_plugin_icon_url.return_value = ICON_URL + ctrl = make_controller(entity=make_provider_entity(constructor=None)) + + api = ctrl.to_api_entity() + + assert api.supported_creation_methods == [TriggerCreationMethod.MANUAL] + + @patch("core.trigger.provider.PluginService") + def test_adds_oauth_when_oauth_schema_present(self, mock_plugin_svc): + mock_plugin_svc.get_plugin_icon_url.return_value = ICON_URL + oauth = OAuthSchema(client_schema=[], credentials_schema=[]) + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor(oauth_schema=oauth))) + + api = ctrl.to_api_entity() + + assert TriggerCreationMethod.OAUTH in api.supported_creation_methods + assert TriggerCreationMethod.MANUAL in api.supported_creation_methods + + @patch("core.trigger.provider.PluginService") + def test_adds_apikey_when_credentials_schema_present(self, mock_plugin_svc): + mock_plugin_svc.get_plugin_icon_url.return_value = ICON_URL + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[make_provider_config()])) + ) + + api = ctrl.to_api_entity() + + assert TriggerCreationMethod.APIKEY in api.supported_creation_methods + + +class TestGetEvent: + def test_returns_matching_event(self): + evt = make_event("push") + ctrl = make_controller(entity=make_provider_entity(events=[evt, make_event("pr")])) + + assert ctrl.get_event("push") is evt + + def test_returns_none_for_unknown(self): + ctrl = make_controller(entity=make_provider_entity(events=[make_event("push")])) + + assert ctrl.get_event("nonexistent") is None + + +class TestGetSubscriptionDefaultProperties: + def test_returns_defaults_skipping_none(self): + config1 = make_provider_config("key1") + config1.default = "val1" + config2 = make_provider_config("key2") + config2.default = None + ctrl = make_controller(entity=make_provider_entity(subscription_schema=[config1, config2])) + + props = ctrl.get_subscription_default_properties() + + assert props == {"key1": "val1"} + + +class TestValidateCredentials: + def test_raises_when_no_constructor(self): + ctrl = make_controller(entity=make_provider_entity(constructor=None)) + + with pytest.raises(ValueError, match="Subscription constructor not found"): + ctrl.validate_credentials("u1", {"key": "val"}) + + def test_raises_for_missing_required_field(self): + required_cfg = make_provider_config("api_key", required=True) + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[required_cfg])) + ) + + with pytest.raises(TriggerProviderCredentialValidationError, match="Missing required"): + ctrl.validate_credentials("u1", {}) + + @patch("core.trigger.provider.PluginTriggerClient") + def test_passes_with_valid_credentials(self, mock_client): + required_cfg = make_provider_config("api_key", required=True) + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[required_cfg])) + ) + mock_client.return_value.validate_provider_credentials.return_value = True + + ctrl.validate_credentials("u1", {"api_key": "secret123"}) # should not raise + + @patch("core.trigger.provider.PluginTriggerClient") + def test_raises_when_plugin_rejects(self, mock_client): + required_cfg = make_provider_config("api_key", required=True) + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[required_cfg])) + ) + mock_client.return_value.validate_provider_credentials.return_value = None + + with pytest.raises(TriggerProviderCredentialValidationError, match="Invalid credentials"): + ctrl.validate_credentials("u1", {"api_key": "bad"}) + + +class TestGetSupportedCredentialTypes: + def test_empty_when_no_constructor(self): + ctrl = make_controller(entity=make_provider_entity(constructor=None)) + assert ctrl.get_supported_credential_types() == [] + + def test_oauth_only(self): + oauth = OAuthSchema(client_schema=[], credentials_schema=[]) + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor(oauth_schema=oauth))) + + types = ctrl.get_supported_credential_types() + + assert CredentialType.OAUTH2 in types + assert CredentialType.API_KEY not in types + + def test_apikey_only(self): + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[make_provider_config()])) + ) + + types = ctrl.get_supported_credential_types() + + assert CredentialType.API_KEY in types + assert CredentialType.OAUTH2 not in types + + def test_both(self): + oauth = OAuthSchema(client_schema=[], credentials_schema=[make_provider_config("oauth_secret")]) + ctrl = make_controller( + entity=make_provider_entity( + constructor=make_constructor(credentials_schema=[make_provider_config()], oauth_schema=oauth) + ) + ) + + types = ctrl.get_supported_credential_types() + + assert CredentialType.OAUTH2 in types + assert CredentialType.API_KEY in types + + +class TestGetCredentialsSchema: + def test_returns_empty_when_no_constructor(self): + ctrl = make_controller(entity=make_provider_entity(constructor=None)) + assert ctrl.get_credentials_schema(CredentialType.API_KEY) == [] + + def test_returns_apikey_credentials(self): + cfg = make_provider_config("token") + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor(credentials_schema=[cfg]))) + + result = ctrl.get_credentials_schema(CredentialType.API_KEY) + + assert len(result) == 1 + assert result[0].name == "token" + + def test_returns_oauth_credentials(self): + oauth_cred = make_provider_config("oauth_token") + oauth = OAuthSchema(client_schema=[], credentials_schema=[oauth_cred]) + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor(oauth_schema=oauth))) + + result = ctrl.get_credentials_schema(CredentialType.OAUTH2) + + assert len(result) == 1 + assert result[0].name == "oauth_token" + + def test_unauthorized_returns_empty(self): + ctrl = make_controller( + entity=make_provider_entity(constructor=make_constructor(credentials_schema=[make_provider_config()])) + ) + assert ctrl.get_credentials_schema(CredentialType.UNAUTHORIZED) == [] + + def test_invalid_type_raises(self): + ctrl = make_controller(entity=make_provider_entity(constructor=make_constructor())) + with pytest.raises(ValueError, match="Invalid credential type"): + ctrl.get_credentials_schema("bogus_type") + + +class TestGetEventParameters: + def test_returns_params_for_known_event(self): + param = EventParameter(name="branch", label=i18n("branch"), type=EventParameterType.STRING) + evt = make_event("push", parameters=[param]) + ctrl = make_controller(entity=make_provider_entity(events=[evt])) + + result = ctrl.get_event_parameters("push") + + assert "branch" in result + assert result["branch"].name == "branch" + + def test_returns_empty_for_unknown_event(self): + ctrl = make_controller(entity=make_provider_entity(events=[make_event("push")])) + + assert ctrl.get_event_parameters("nonexistent") == {} + + +class TestDispatch: + @patch("core.trigger.provider.PluginTriggerClient") + def test_delegates_to_client(self, mock_client): + ctrl = make_controller() + expected = MagicMock() + mock_client.return_value.dispatch_event.return_value = expected + + result = ctrl.dispatch( + request=MagicMock(), + subscription=make_subscription(), + credentials={"k": "v"}, + credential_type=CredentialType.API_KEY, + ) + + assert result is expected + mock_client.return_value.dispatch_event.assert_called_once() + + +class TestInvokeTriggerEvent: + @patch("core.trigger.provider.PluginTriggerClient") + def test_delegates_to_client(self, mock_client): + ctrl = make_controller() + expected = MagicMock() + mock_client.return_value.invoke_trigger_event.return_value = expected + + result = ctrl.invoke_trigger_event( + user_id="u1", + event_name="push", + parameters={}, + credentials={}, + credential_type=CredentialType.API_KEY, + subscription=make_subscription(), + request=MagicMock(), + payload={}, + ) + + assert result is expected + + +class TestSubscribeTrigger: + @patch("core.trigger.provider.PluginTriggerClient") + def test_returns_validated_subscription(self, mock_client): + ctrl = make_controller() + mock_client.return_value.subscribe.return_value.subscription = { + "expires_at": 123, + "endpoint": "https://e", + "properties": {}, + } + + result = ctrl.subscribe_trigger( + user_id="u1", + endpoint="https://e", + parameters={}, + credentials={}, + credential_type=CredentialType.API_KEY, + ) + + assert result.endpoint == "https://e" + + +class TestUnsubscribeTrigger: + @patch("core.trigger.provider.PluginTriggerClient") + def test_returns_validated_result(self, mock_client): + ctrl = make_controller() + mock_client.return_value.unsubscribe.return_value.subscription = {"success": True, "message": "ok"} + + result = ctrl.unsubscribe_trigger( + user_id="u1", + subscription=make_subscription(), + credentials={}, + credential_type=CredentialType.API_KEY, + ) + + assert result.success is True + + +class TestRefreshTrigger: + @patch("core.trigger.provider.PluginTriggerClient") + def test_uses_system_user_id(self, mock_client): + ctrl = make_controller() + mock_client.return_value.refresh.return_value.subscription = { + "expires_at": 456, + "endpoint": "https://e", + "properties": {}, + } + + ctrl.refresh_trigger(subscription=make_subscription(), credentials={}, credential_type=CredentialType.API_KEY) + + call_kwargs = mock_client.return_value.refresh.call_args[1] + assert call_kwargs["user_id"] == "system" diff --git a/api/tests/unit_tests/core/trigger/test_trigger_manager.py b/api/tests/unit_tests/core/trigger/test_trigger_manager.py new file mode 100644 index 0000000000..612be25ec9 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/test_trigger_manager.py @@ -0,0 +1,307 @@ +""" +Tests for core.trigger.trigger_manager.TriggerManager. + +Covers: icon URL construction, provider listing with error resilience, +double-check lock caching, error translation, EventIgnoreError -> cancelled, +and delegation to provider controller. +""" + +from __future__ import annotations + +from threading import Lock +from unittest.mock import MagicMock, patch + +import pytest + +from core.plugin.entities.plugin_daemon import CredentialType +from core.plugin.entities.request import TriggerInvokeEventResponse +from core.plugin.impl.exc import PluginDaemonError, PluginNotFoundError +from core.trigger.errors import EventIgnoreError +from core.trigger.trigger_manager import TriggerManager +from models.provider_ids import TriggerProviderID +from tests.unit_tests.core.trigger.conftest import ( + VALID_PROVIDER_ID, + make_controller, + make_provider_entity, + make_subscription, +) + +PID = TriggerProviderID(VALID_PROVIDER_ID) +PID_STR = str(PID) + + +class TestGetTriggerPluginIcon: + @patch("core.trigger.trigger_manager.dify_config") + @patch("core.trigger.trigger_manager.PluginTriggerClient") + def test_builds_correct_url(self, mock_client, mock_config): + mock_config.CONSOLE_API_URL = "https://console.example.com" + provider = MagicMock() + provider.declaration.identity.icon = "my-icon.svg" + mock_client.return_value.fetch_trigger_provider.return_value = provider + + url = TriggerManager.get_trigger_plugin_icon("tenant-1", VALID_PROVIDER_ID) + + assert "tenant_id=tenant-1" in url + assert "filename=my-icon.svg" in url + assert url.startswith("https://console.example.com/console/api/workspaces/current/plugin/icon") + + +class TestListPluginTriggerProviders: + @patch("core.trigger.trigger_manager.PluginTriggerClient") + def test_wraps_entities_into_controllers(self, mock_client): + entity = MagicMock() + entity.declaration = make_provider_entity("p1") + entity.plugin_id = "plugin-1" + entity.plugin_unique_identifier = "uid-1" + entity.provider = VALID_PROVIDER_ID + mock_client.return_value.fetch_trigger_providers.return_value = [entity] + + controllers = TriggerManager.list_plugin_trigger_providers("tenant-1") + + assert len(controllers) == 1 + assert controllers[0].plugin_id == "plugin-1" + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + def test_skips_failing_providers(self, mock_client): + good = MagicMock() + good.declaration = make_provider_entity("good") + good.plugin_id = "good-plugin" + good.plugin_unique_identifier = "uid-good" + good.provider = VALID_PROVIDER_ID + + bad = MagicMock() + bad.declaration = make_provider_entity("bad") + bad.plugin_id = "bad-plugin" + bad.plugin_unique_identifier = "uid-bad" + bad.provider = "bad/format" # 2-part: fails TriggerProviderID validation + + mock_client.return_value.fetch_trigger_providers.return_value = [bad, good] + + controllers = TriggerManager.list_plugin_trigger_providers("tenant-1") + + assert len(controllers) == 1 + assert controllers[0].plugin_id == "good-plugin" + + +class TestGetTriggerProvider: + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_initializes_context_on_first_call(self, mock_ctx, mock_client): + # get() called 3 times: (1) try block, (2) after set, (3) under lock + mock_ctx.plugin_trigger_providers.get.side_effect = [LookupError, {}, {}] + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + provider = MagicMock() + provider.declaration = make_provider_entity() + provider.plugin_id = "p1" + provider.plugin_unique_identifier = "uid-1" + mock_client.return_value.fetch_trigger_provider.return_value = provider + + result = TriggerManager.get_trigger_provider("t1", PID) + + mock_ctx.plugin_trigger_providers.set.assert_called_once_with({}) + mock_ctx.plugin_trigger_providers_lock.set.assert_called_once() + assert result is not None + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_returns_cached_without_fetch(self, mock_ctx, mock_client): + cached = make_controller() + mock_ctx.plugin_trigger_providers.get.return_value = {PID_STR: cached} + + result = TriggerManager.get_trigger_provider("t1", PID) + + assert result is cached + mock_client.return_value.fetch_trigger_provider.assert_not_called() + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_double_check_lock_uses_cached_from_other_thread(self, mock_ctx, mock_client): + cached = make_controller() + mock_ctx.plugin_trigger_providers.get.side_effect = [ + {}, # first check misses + {PID_STR: cached}, # under-lock check hits + ] + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + + result = TriggerManager.get_trigger_provider("t1", PID) + + assert result is cached + mock_client.return_value.fetch_trigger_provider.assert_not_called() + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_fetches_and_caches_on_miss(self, mock_ctx, mock_client): + cache: dict = {} + mock_ctx.plugin_trigger_providers.get.return_value = cache + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + provider = MagicMock() + provider.declaration = make_provider_entity() + provider.plugin_id = "p1" + provider.plugin_unique_identifier = "uid-1" + mock_client.return_value.fetch_trigger_provider.return_value = provider + + result = TriggerManager.get_trigger_provider("t1", PID) + + assert result is not None + assert PID_STR in cache + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_none_fetch_raises_value_error(self, mock_ctx, mock_client): + mock_ctx.plugin_trigger_providers.get.return_value = {} + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + mock_client.return_value.fetch_trigger_provider.return_value = None + + with pytest.raises(ValueError): + TriggerManager.get_trigger_provider("t1", TriggerProviderID("org/plug/missing")) + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_plugin_not_found_becomes_value_error(self, mock_ctx, mock_client): + mock_ctx.plugin_trigger_providers.get.return_value = {} + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + mock_client.return_value.fetch_trigger_provider.side_effect = PluginNotFoundError("gone") + + with pytest.raises(ValueError): + TriggerManager.get_trigger_provider("t1", TriggerProviderID("org/plug/miss")) + + @patch("core.trigger.trigger_manager.PluginTriggerClient") + @patch("core.trigger.trigger_manager.contexts") + def test_plugin_daemon_error_propagates(self, mock_ctx, mock_client): + mock_ctx.plugin_trigger_providers.get.return_value = {} + mock_ctx.plugin_trigger_providers_lock.get.return_value = Lock() + mock_client.return_value.fetch_trigger_provider.side_effect = PluginDaemonError("test error") + + with pytest.raises(PluginDaemonError): + TriggerManager.get_trigger_provider("t1", TriggerProviderID("org/plug/miss")) + + +class TestListAllTriggerProviders: + @patch.object(TriggerManager, "list_plugin_trigger_providers") + def test_delegates_to_list_plugin(self, mock_list): + expected = [make_controller()] + mock_list.return_value = expected + + assert TriggerManager.list_all_trigger_providers("t1") is expected + mock_list.assert_called_once_with("t1") + + +class TestListTriggersByProvider: + @patch.object(TriggerManager, "get_trigger_provider") + def test_returns_provider_events(self, mock_get): + ctrl = make_controller() + mock_get.return_value = ctrl + + result = TriggerManager.list_triggers_by_provider("t1", PID) + + assert result == ctrl.get_events() + + +class TestInvokeTriggerEvent: + def _args(self): + return { + "tenant_id": "t1", + "user_id": "u1", + "provider_id": PID, + "event_name": "on_push", + "parameters": {"branch": "main"}, + "credentials": {"token": "abc"}, + "credential_type": CredentialType.API_KEY, + "subscription": make_subscription(), + "request": MagicMock(), + "payload": {"action": "push"}, + } + + @patch.object(TriggerManager, "get_trigger_provider") + def test_returns_invoke_response(self, mock_get): + ctrl = MagicMock() + expected = TriggerInvokeEventResponse(variables={"v": "1"}, cancelled=False) + ctrl.invoke_trigger_event.return_value = expected + mock_get.return_value = ctrl + + result = TriggerManager.invoke_trigger_event(**self._args()) + + assert result is expected + assert result.cancelled is False + + @patch.object(TriggerManager, "get_trigger_provider") + def test_event_ignore_returns_cancelled(self, mock_get): + ctrl = MagicMock() + ctrl.invoke_trigger_event.side_effect = EventIgnoreError("skip") + mock_get.return_value = ctrl + + result = TriggerManager.invoke_trigger_event(**self._args()) + + assert result.cancelled is True + assert result.variables == {} + + @patch.object(TriggerManager, "get_trigger_provider") + def test_other_errors_propagate(self, mock_get): + ctrl = MagicMock() + ctrl.invoke_trigger_event.side_effect = RuntimeError("boom") + mock_get.return_value = ctrl + + with pytest.raises(RuntimeError, match="boom"): + TriggerManager.invoke_trigger_event(**self._args()) + + +class TestSubscribeTrigger: + @patch.object(TriggerManager, "get_trigger_provider") + def test_delegates_with_correct_args(self, mock_get): + ctrl = MagicMock() + expected = make_subscription() + ctrl.subscribe_trigger.return_value = expected + mock_get.return_value = ctrl + + result = TriggerManager.subscribe_trigger( + tenant_id="t1", + user_id="u1", + provider_id=PID, + endpoint="https://hook.test", + parameters={"f": "all"}, + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + ) + + assert result is expected + ctrl.subscribe_trigger.assert_called_once() + + +class TestUnsubscribeTrigger: + @patch.object(TriggerManager, "get_trigger_provider") + def test_delegates_with_correct_args(self, mock_get): + ctrl = MagicMock() + expected = MagicMock() + ctrl.unsubscribe_trigger.return_value = expected + mock_get.return_value = ctrl + sub = make_subscription() + + result = TriggerManager.unsubscribe_trigger( + tenant_id="t1", + user_id="u1", + provider_id=PID, + subscription=sub, + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + ) + + assert result is expected + + +class TestRefreshTrigger: + @patch.object(TriggerManager, "get_trigger_provider") + def test_delegates_with_correct_args(self, mock_get): + ctrl = MagicMock() + expected = make_subscription() + ctrl.refresh_trigger.return_value = expected + mock_get.return_value = ctrl + + result = TriggerManager.refresh_trigger( + tenant_id="t1", + provider_id=PID, + subscription=make_subscription(), + credentials={"token": "x"}, + credential_type=CredentialType.API_KEY, + ) + + assert result is expected diff --git a/api/tests/unit_tests/core/trigger/utils/__init__.py b/api/tests/unit_tests/core/trigger/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/unit_tests/core/trigger/utils/test_utils_encryption.py b/api/tests/unit_tests/core/trigger/utils/test_utils_encryption.py new file mode 100644 index 0000000000..8804526e2e --- /dev/null +++ b/api/tests/unit_tests/core/trigger/utils/test_utils_encryption.py @@ -0,0 +1,62 @@ +"""Tests for core.trigger.utils.encryption — masking logic and cache key generation.""" + +from __future__ import annotations + +from core.entities.provider_entities import ProviderConfig +from core.tools.entities.common_entities import I18nObject +from core.trigger.utils.encryption import ( + TriggerProviderCredentialsCache, + TriggerProviderOAuthClientParamsCache, + TriggerProviderPropertiesCache, + masked_credentials, +) + + +def _make_schema(name: str, field_type: str = "secret-input") -> ProviderConfig: + return ProviderConfig( + name=name, + label=I18nObject(en_US=name, zh_Hans=name), + type=field_type, + ) + + +class TestMaskedCredentials: + def test_short_secret_fully_masked(self): + schema = [_make_schema("key", "secret-input")] + result = masked_credentials(schema, {"key": "ab"}) + assert result["key"] == "**" + + def test_long_secret_partially_masked(self): + schema = [_make_schema("key", "secret-input")] + result = masked_credentials(schema, {"key": "abcdef"}) + assert result["key"].startswith("ab") + assert result["key"].endswith("ef") + assert "**" in result["key"] + + def test_non_secret_field_unchanged(self): + schema = [_make_schema("host", "text-input")] + result = masked_credentials(schema, {"host": "example.com"}) + assert result["host"] == "example.com" + + def test_unknown_key_passes_through(self): + result = masked_credentials([], {"unknown": "value"}) + assert result["unknown"] == "value" + + +class TestCacheKeyGeneration: + def test_credentials_cache_key_contains_ids(self): + cache = TriggerProviderCredentialsCache(tenant_id="t1", provider_id="p1", credential_id="c1") + assert "t1" in cache.cache_key + assert "p1" in cache.cache_key + assert "c1" in cache.cache_key + + def test_oauth_client_cache_key_contains_ids(self): + cache = TriggerProviderOAuthClientParamsCache(tenant_id="t1", provider_id="p1") + assert "t1" in cache.cache_key + assert "p1" in cache.cache_key + + def test_properties_cache_key_contains_ids(self): + cache = TriggerProviderPropertiesCache(tenant_id="t1", provider_id="p1", subscription_id="s1") + assert "t1" in cache.cache_key + assert "p1" in cache.cache_key + assert "s1" in cache.cache_key diff --git a/api/tests/unit_tests/core/trigger/utils/test_utils_endpoint.py b/api/tests/unit_tests/core/trigger/utils/test_utils_endpoint.py new file mode 100644 index 0000000000..e5879aea0a --- /dev/null +++ b/api/tests/unit_tests/core/trigger/utils/test_utils_endpoint.py @@ -0,0 +1,31 @@ +"""Tests for core.trigger.utils.endpoint — URL generation.""" + +from __future__ import annotations + +from unittest.mock import patch + +from yarl import URL + +from core.trigger.utils import endpoint + + +class TestGeneratePluginTriggerEndpointUrl: + def test_builds_correct_url(self): + with patch.object(endpoint, "base_url", URL("https://api.example.com")): + url = endpoint.generate_plugin_trigger_endpoint_url("endpoint-123") + + assert url == "https://api.example.com/triggers/plugin/endpoint-123" + + +class TestGenerateWebhookTriggerEndpoint: + def test_non_debug_url(self): + with patch.object(endpoint, "base_url", URL("https://api.example.com")): + url = endpoint.generate_webhook_trigger_endpoint("sub-456", debug=False) + + assert url == "https://api.example.com/triggers/webhook/sub-456" + + def test_debug_url(self): + with patch.object(endpoint, "base_url", URL("https://api.example.com")): + url = endpoint.generate_webhook_trigger_endpoint("sub-456", debug=True) + + assert url == "https://api.example.com/triggers/webhook-debug/sub-456" diff --git a/api/tests/unit_tests/core/trigger/utils/test_utils_locks.py b/api/tests/unit_tests/core/trigger/utils/test_utils_locks.py new file mode 100644 index 0000000000..4fa202b164 --- /dev/null +++ b/api/tests/unit_tests/core/trigger/utils/test_utils_locks.py @@ -0,0 +1,23 @@ +"""Tests for core.trigger.utils.locks — Redis lock key builders.""" + +from __future__ import annotations + +from core.trigger.utils.locks import build_trigger_refresh_lock_key, build_trigger_refresh_lock_keys + + +class TestBuildTriggerRefreshLockKey: + def test_correct_format(self): + key = build_trigger_refresh_lock_key("tenant-1", "sub-1") + + assert key == "trigger_provider_refresh_lock:tenant-1_sub-1" + + +class TestBuildTriggerRefreshLockKeys: + def test_maps_over_pairs(self): + pairs = [("t1", "s1"), ("t2", "s2")] + + keys = build_trigger_refresh_lock_keys(pairs) + + assert len(keys) == 2 + assert keys[0] == "trigger_provider_refresh_lock:t1_s1" + assert keys[1] == "trigger_provider_refresh_lock:t2_s2" diff --git a/api/tests/unit_tests/core/variables/test_segment.py b/api/tests/unit_tests/core/variables/test_segment.py index a9af8bea1d..91259c9a45 100644 --- a/api/tests/unit_tests/core/variables/test_segment.py +++ b/api/tests/unit_tests/core/variables/test_segment.py @@ -1,12 +1,15 @@ import dataclasses +import orjson +import pytest from pydantic import BaseModel from core.helper import encrypter -from core.workflow.file import File, FileTransferMethod, FileType -from core.workflow.runtime import VariablePool -from core.workflow.system_variable import SystemVariable -from core.workflow.variables.segments import ( +from dify_graph.file import File, FileTransferMethod, FileType +from dify_graph.runtime import VariablePool +from dify_graph.system_variable import SystemVariable +from dify_graph.variables.segment_group import SegmentGroup +from dify_graph.variables.segments import ( ArrayAnySegment, ArrayFileSegment, ArrayNumberSegment, @@ -22,8 +25,13 @@ from core.workflow.variables.segments import ( StringSegment, get_segment_discriminator, ) -from core.workflow.variables.types import SegmentType -from core.workflow.variables.variables import ( +from dify_graph.variables.types import SegmentType +from dify_graph.variables.utils import ( + dumps_with_segments, + segment_orjson_default, + to_selector, +) +from dify_graph.variables.variables import ( ArrayAnyVariable, ArrayFileVariable, ArrayNumberVariable, @@ -379,3 +387,125 @@ class TestSegmentDumpAndLoad: assert get_segment_discriminator("not_a_dict") is None assert get_segment_discriminator(42) is None assert get_segment_discriminator(object) is None + + +class TestSegmentAdditionalProperties: + def test_base_segment_text_log_markdown_size_and_to_object(self): + """Ensure StringSegment exposes text, log, markdown, size and to_object.""" + segment = StringSegment(value="hello") + + assert segment.text == "hello" + assert segment.log == "hello" + assert segment.markdown == "hello" + assert segment.size > 0 + assert segment.to_object() == "hello" + + def test_none_segment_empty_outputs(self): + """Ensure NoneSegment renders empty text, log and markdown.""" + segment = NoneSegment() + + assert segment.text == "" + assert segment.log == "" + assert segment.markdown == "" + + def test_object_segment_json_outputs(self): + """Ensure ObjectSegment renders JSON output for text, log and markdown.""" + segment = ObjectSegment(value={"key": "值", "n": 1}) + + assert segment.text == '{"key": "值", "n": 1}' + assert segment.log == '{\n "key": "值",\n "n": 1\n}' + assert segment.markdown == '{\n "key": "值",\n "n": 1\n}' + + def test_array_segment_text_and_markdown(self): + """Ensure ArrayAnySegment handles empty/non-empty text and markdown rendering.""" + empty_segment = ArrayAnySegment(value=[]) + non_empty_segment = ArrayAnySegment(value=[1, "two"]) + + assert empty_segment.text == "" + assert non_empty_segment.text == "[1, 'two']" + assert non_empty_segment.markdown == "- 1\n- two" + + def test_file_segment_properties(self): + """Ensure FileSegment markdown, text and log fields match expected behavior.""" + file = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="doc.txt") + segment = FileSegment(value=file) + + assert segment.markdown == "[doc.txt](https://example.com/file.txt)" + assert segment.log == "" + assert segment.text == "" + + def test_array_string_segment_text_branches(self): + """Ensure ArrayStringSegment text handling for empty and non-empty values.""" + empty_segment = ArrayStringSegment(value=[]) + non_empty_segment = ArrayStringSegment(value=["hello", "世界"]) + + assert empty_segment.text == "" + assert non_empty_segment.text == '["hello", "世界"]' + + def test_array_file_segment_markdown_and_empty_text_log(self): + """Ensure ArrayFileSegment markdown renders links and text/log stay empty.""" + file1 = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="a.txt") + file2 = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="b.txt") + segment = ArrayFileSegment(value=[file1, file2]) + + assert segment.markdown == "[a.txt](https://example.com/file.txt)\n[b.txt](https://example.com/file.txt)" + assert segment.log == "" + assert segment.text == "" + + +class TestSegmentGroupAdditional: + def test_segment_group_markdown_and_to_object(self): + group = SegmentGroup(value=[StringSegment(value="A"), NoneSegment(), StringSegment(value="B")]) + + assert group.markdown == "AB" + assert group.to_object() == ["A", None, "B"] + + +class TestSegmentUtils: + def test_to_selector_without_paths(self): + assert to_selector("node-1", "output") == ["node-1", "output"] + + def test_to_selector_with_paths(self): + assert to_selector("node-1", "output", ("a", "b")) == ["node-1", "output", "a", "b"] + + def test_array_file_segment_serialization(self): + file1 = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="a.txt") + file2 = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="b.txt") + + result = segment_orjson_default(ArrayFileSegment(value=[file1, file2])) + + assert len(result) == 2 + assert result[0]["filename"] == "a.txt" + assert result[1]["filename"] == "b.txt" + + def test_file_segment_serialization(self): + file = create_test_file(transfer_method=FileTransferMethod.REMOTE_URL, filename="single.txt") + + result = segment_orjson_default(FileSegment(value=file)) + + assert result["filename"] == "single.txt" + assert result["remote_url"] == "https://example.com/file.txt" + + def test_segment_group_and_segment_serialization(self): + group = SegmentGroup(value=[StringSegment(value="a"), StringSegment(value="b")]) + + assert segment_orjson_default(group) == ["a", "b"] + assert segment_orjson_default(StringSegment(value="value")) == "value" + + def test_segment_orjson_default_unsupported_type(self): + with pytest.raises(TypeError, match="not JSON serializable"): + segment_orjson_default(object()) + + def test_dumps_with_segments(self): + data = { + "segment": StringSegment(value="hello"), + "group": SegmentGroup(value=[StringSegment(value="x"), StringSegment(value="y")]), + 1: "numeric-key", + } + + dumped = dumps_with_segments(data) + loaded = orjson.loads(dumped) + + assert loaded["segment"] == "hello" + assert loaded["group"] == ["x", "y"] + assert loaded["1"] == "numeric-key" diff --git a/api/tests/unit_tests/core/variables/test_segment_type.py b/api/tests/unit_tests/core/variables/test_segment_type.py index e28fed187b..bb234d9bbd 100644 --- a/api/tests/unit_tests/core/variables/test_segment_type.py +++ b/api/tests/unit_tests/core/variables/test_segment_type.py @@ -1,6 +1,8 @@ import pytest -from core.workflow.variables.types import ArrayValidation, SegmentType +from dify_graph.variables.segment_group import SegmentGroup +from dify_graph.variables.segments import StringSegment +from dify_graph.variables.types import ArrayValidation, SegmentType class TestSegmentTypeIsArrayType: @@ -69,22 +71,36 @@ class TestSegmentTypeIsValidArrayValidation: """ def test_array_validation_all_success(self): + # Arrange value = ["hello", "world", "foo"] - assert SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.ALL) + # Act + is_valid = SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.ALL) + # Assert + assert is_valid def test_array_validation_all_fail(self): + # Arrange value = ["hello", 123, "world"] - # Should return False, since 123 is not a string - assert not SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.ALL) + # Act + is_valid = SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.ALL) + # Assert + assert not is_valid def test_array_validation_first(self): + # Arrange value = ["hello", 123, None] - assert SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.FIRST) + # Act + is_valid = SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.FIRST) + # Assert + assert is_valid def test_array_validation_none(self): + # Arrange value = [1, 2, 3] - # validation is None, skip - assert SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.NONE) + # Act + is_valid = SegmentType.ARRAY_STRING.is_valid(value, array_validation=ArrayValidation.NONE) + # Assert + assert is_valid class TestSegmentTypeGetZeroValue: @@ -163,3 +179,62 @@ class TestSegmentTypeGetZeroValue: for seg_type in unsupported_types: with pytest.raises(ValueError, match="unsupported variable type"): SegmentType.get_zero_value(seg_type) + + +class TestSegmentTypeInferSegmentType: + @pytest.mark.parametrize( + ("value", "expected"), + [ + ([], SegmentType.ARRAY_NUMBER), + ([1, 2, 3], SegmentType.ARRAY_NUMBER), + ([1, 2.5], SegmentType.ARRAY_NUMBER), + (["a", "b"], SegmentType.ARRAY_STRING), + ([{"k": "v"}], SegmentType.ARRAY_OBJECT), + ([None], SegmentType.ARRAY_ANY), + ([True, False], SegmentType.ARRAY_BOOLEAN), + ([[1], [2]], SegmentType.ARRAY_ANY), + ([1, "a"], SegmentType.ARRAY_ANY), + (None, SegmentType.NONE), + (True, SegmentType.BOOLEAN), + (1, SegmentType.INTEGER), + (1.2, SegmentType.FLOAT), + ("abc", SegmentType.STRING), + ({"k": "v"}, SegmentType.OBJECT), + ], + ) + def test_infer_segment_type_supported_values(self, value, expected): + assert SegmentType.infer_segment_type(value) == expected + + +class TestSegmentTypeAdditionalMethods: + def test_cast_value_for_bool_number_and_array_number(self): + assert SegmentType.cast_value(True, SegmentType.INTEGER) == 1 + assert SegmentType.cast_value(False, SegmentType.NUMBER) == 0 + assert SegmentType.cast_value([True, False], SegmentType.ARRAY_NUMBER) == [1, 0] + + mixed = [True, 1] + assert SegmentType.cast_value(mixed, SegmentType.ARRAY_NUMBER) is mixed + assert SegmentType.cast_value("x", SegmentType.STRING) == "x" + + def test_exposed_type_and_element_type(self): + assert SegmentType.INTEGER.exposed_type() == SegmentType.NUMBER + assert SegmentType.FLOAT.exposed_type() == SegmentType.NUMBER + assert SegmentType.STRING.exposed_type() == SegmentType.STRING + + assert SegmentType.ARRAY_STRING.element_type() == SegmentType.STRING + assert SegmentType.ARRAY_ANY.element_type() is None + + with pytest.raises(ValueError, match="element_type is only supported by array type"): + SegmentType.STRING.element_type() + + def test_group_validation_for_segment_group_and_list(self): + valid_group = SegmentGroup(value=[StringSegment(value="a")]) + assert SegmentType.GROUP.is_valid(valid_group) is True + assert SegmentType.GROUP.is_valid([StringSegment(value="b")]) is True + assert SegmentType.GROUP.is_valid(["not-segment"]) is False + + def test_unreachable_assertion_branch(self, monkeypatch): + monkeypatch.setattr(SegmentType, "is_array_type", lambda self: False) + + with pytest.raises(AssertionError, match="unreachable"): + SegmentType.ARRAY_STRING.is_valid(["a"]) diff --git a/api/tests/unit_tests/core/variables/test_segment_type_validation.py b/api/tests/unit_tests/core/variables/test_segment_type_validation.py index 52e5dd180c..41ce483447 100644 --- a/api/tests/unit_tests/core/variables/test_segment_type_validation.py +++ b/api/tests/unit_tests/core/variables/test_segment_type_validation.py @@ -10,10 +10,10 @@ from typing import Any import pytest -from core.workflow.file.enums import FileTransferMethod, FileType -from core.workflow.file.models import File -from core.workflow.variables.segment_group import SegmentGroup -from core.workflow.variables.segments import ( +from dify_graph.file.enums import FileTransferMethod, FileType +from dify_graph.file.models import File +from dify_graph.variables.segment_group import SegmentGroup +from dify_graph.variables.segments import ( ArrayFileSegment, BooleanSegment, FileSegment, @@ -22,7 +22,7 @@ from core.workflow.variables.segments import ( ObjectSegment, StringSegment, ) -from core.workflow.variables.types import ArrayValidation, SegmentType +from dify_graph.variables.types import ArrayValidation, SegmentType def create_test_file( diff --git a/api/tests/unit_tests/core/variables/test_variables.py b/api/tests/unit_tests/core/variables/test_variables.py index 6fc162e533..dd0fe2e65a 100644 --- a/api/tests/unit_tests/core/variables/test_variables.py +++ b/api/tests/unit_tests/core/variables/test_variables.py @@ -1,7 +1,7 @@ import pytest from pydantic import ValidationError -from core.workflow.variables import ( +from dify_graph.variables import ( ArrayFileVariable, ArrayVariable, FloatVariable, @@ -11,7 +11,7 @@ from core.workflow.variables import ( SegmentType, StringVariable, ) -from core.workflow.variables.variables import VariableBase +from dify_graph.variables.variables import VariableBase def test_frozen_variables(): diff --git a/api/tests/unit_tests/core/workflow/context/test_execution_context.py b/api/tests/unit_tests/core/workflow/context/test_execution_context.py index 8dd669e17f..d09b8397c3 100644 --- a/api/tests/unit_tests/core/workflow/context/test_execution_context.py +++ b/api/tests/unit_tests/core/workflow/context/test_execution_context.py @@ -9,7 +9,7 @@ from unittest.mock import MagicMock import pytest from pydantic import BaseModel -from core.workflow.context.execution_context import ( +from dify_graph.context.execution_context import ( AppContext, ExecutionContext, ExecutionContextBuilder, @@ -286,7 +286,7 @@ class TestCaptureCurrentContext: def test_capture_current_context_returns_context(self): """Test that capture_current_context returns a valid context.""" - from core.workflow.context.execution_context import capture_current_context + from dify_graph.context.execution_context import capture_current_context result = capture_current_context() @@ -303,7 +303,7 @@ class TestCaptureCurrentContext: test_var = contextvars.ContextVar("capture_test_var") test_var.set("test_value_123") - from core.workflow.context.execution_context import capture_current_context + from dify_graph.context.execution_context import capture_current_context result = capture_current_context() @@ -313,12 +313,12 @@ class TestCaptureCurrentContext: class TestTenantScopedContextRegistry: def setup_method(self): - from core.workflow.context import reset_context_provider + from dify_graph.context import reset_context_provider reset_context_provider() def teardown_method(self): - from core.workflow.context import reset_context_provider + from dify_graph.context import reset_context_provider reset_context_provider() @@ -333,7 +333,7 @@ class TestTenantScopedContextRegistry: assert read_context("workflow.sandbox", tenant_id="t2").base_url == "http://t2" def test_missing_provider_raises_keyerror(self): - from core.workflow.context import ContextProviderNotFoundError + from dify_graph.context import ContextProviderNotFoundError with pytest.raises(ContextProviderNotFoundError): read_context("missing", tenant_id="unknown") diff --git a/api/tests/unit_tests/core/workflow/entities/test_graph_runtime_state.py b/api/tests/unit_tests/core/workflow/entities/test_graph_runtime_state.py index 8d49394653..22792eb5b3 100644 --- a/api/tests/unit_tests/core/workflow/entities/test_graph_runtime_state.py +++ b/api/tests/unit_tests/core/workflow/entities/test_graph_runtime_state.py @@ -4,8 +4,10 @@ from unittest.mock import MagicMock, patch import pytest -from core.model_runtime.entities.llm_entities import LLMUsage -from core.workflow.runtime import GraphRuntimeState, ReadOnlyGraphRuntimeStateWrapper, VariablePool +from dify_graph.constants import CONVERSATION_VARIABLE_NODE_ID +from dify_graph.model_runtime.entities.llm_entities import LLMUsage +from dify_graph.runtime import GraphRuntimeState, ReadOnlyGraphRuntimeStateWrapper, VariablePool +from dify_graph.variables.variables import StringVariable class StubCoordinator: @@ -115,7 +117,7 @@ class TestGraphRuntimeState: queue = state.ready_queue - from core.workflow.graph_engine.ready_queue import InMemoryReadyQueue + from dify_graph.graph_engine.ready_queue import InMemoryReadyQueue assert isinstance(queue, InMemoryReadyQueue) @@ -124,7 +126,7 @@ class TestGraphRuntimeState: execution = state.graph_execution - from core.workflow.graph_engine.domain.graph_execution import GraphExecution + from dify_graph.graph_engine.domain.graph_execution import GraphExecution assert isinstance(execution, GraphExecution) assert execution.workflow_id == "" @@ -139,7 +141,7 @@ class TestGraphRuntimeState: mock_graph = MagicMock() with patch( - "core.workflow.graph_engine.response_coordinator.ResponseStreamCoordinator", autospec=True + "dify_graph.graph_engine.response_coordinator.ResponseStreamCoordinator", autospec=True ) as coordinator_cls: coordinator_instance = coordinator_cls.return_value state.configure(graph=mock_graph) @@ -278,3 +280,17 @@ class TestGraphRuntimeState: assert restored_execution.started is True assert new_stub.state == "configured" + + def test_snapshot_restore_preserves_updated_conversation_variable(self): + variable_pool = VariablePool( + conversation_variables=[StringVariable(name="session_name", value="before")], + ) + variable_pool.add((CONVERSATION_VARIABLE_NODE_ID, "session_name"), "after") + + state = GraphRuntimeState(variable_pool=variable_pool, start_at=time()) + snapshot = state.dumps() + restored = GraphRuntimeState.from_snapshot(snapshot) + + restored_value = restored.variable_pool.get((CONVERSATION_VARIABLE_NODE_ID, "session_name")) + assert restored_value is not None + assert restored_value.value == "after" diff --git a/api/tests/unit_tests/core/workflow/entities/test_pause_reason.py b/api/tests/unit_tests/core/workflow/entities/test_pause_reason.py index 6144df06e0..158f7018b5 100644 --- a/api/tests/unit_tests/core/workflow/entities/test_pause_reason.py +++ b/api/tests/unit_tests/core/workflow/entities/test_pause_reason.py @@ -5,7 +5,7 @@ Tests for PauseReason discriminated union serialization/deserialization. import pytest from pydantic import BaseModel, ValidationError -from core.workflow.entities.pause_reason import ( +from dify_graph.entities.pause_reason import ( HumanInputRequired, PauseReason, SchedulingPause, diff --git a/api/tests/unit_tests/core/workflow/entities/test_template.py b/api/tests/unit_tests/core/workflow/entities/test_template.py index f3197ea282..2d4c7f7b77 100644 --- a/api/tests/unit_tests/core/workflow/entities/test_template.py +++ b/api/tests/unit_tests/core/workflow/entities/test_template.py @@ -1,6 +1,6 @@ """Tests for template module.""" -from core.workflow.nodes.base.template import Template, TextSegment, VariableSegment +from dify_graph.nodes.base.template import Template, TextSegment, VariableSegment class TestTemplate: diff --git a/api/tests/unit_tests/core/workflow/entities/test_variable_pool.py b/api/tests/unit_tests/core/workflow/entities/test_variable_pool.py index d4254df319..6100ebede5 100644 --- a/api/tests/unit_tests/core/workflow/entities/test_variable_pool.py +++ b/api/tests/unit_tests/core/workflow/entities/test_variable_pool.py @@ -1,5 +1,5 @@ -from core.workflow.runtime import VariablePool -from core.workflow.variables.segments import ( +from dify_graph.runtime import VariablePool +from dify_graph.variables.segments import ( BooleanSegment, IntegerSegment, NoneSegment, diff --git a/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py b/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py index a4b1189a1c..216e64db8d 100644 --- a/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py +++ b/api/tests/unit_tests/core/workflow/entities/test_workflow_node_execution.py @@ -8,8 +8,8 @@ from typing import Any import pytest -from core.workflow.entities.workflow_node_execution import WorkflowNodeExecution -from core.workflow.enums import NodeType +from dify_graph.entities.workflow_node_execution import WorkflowNodeExecution +from dify_graph.enums import BuiltinNodeTypes class TestWorkflowNodeExecutionProcessDataTruncation: @@ -25,7 +25,7 @@ class TestWorkflowNodeExecutionProcessDataTruncation: workflow_id="test-workflow-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", process_data=process_data, created_at=datetime.now(), @@ -212,7 +212,7 @@ class TestWorkflowNodeExecutionProcessDataScenarios: workflow_id="test-workflow-id", index=1, node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, title="Test Node", process_data=scenario.original_data, created_at=datetime.now(), diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph.py b/api/tests/unit_tests/core/workflow/graph/test_graph.py index 01b514ed7c..24bd9ccbed 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph.py @@ -2,10 +2,10 @@ from unittest.mock import Mock -from core.workflow.enums import NodeExecutionType, NodeState, NodeType -from core.workflow.graph.edge import Edge -from core.workflow.graph.graph import Graph -from core.workflow.nodes.base.node import Node +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState +from dify_graph.graph.edge import Edge +from dify_graph.graph.graph import Graph +from dify_graph.nodes.base.node import Node def create_mock_node(node_id: str, execution_type: NodeExecutionType, state: NodeState = NodeState.UNKNOWN) -> Node: @@ -14,7 +14,7 @@ def create_mock_node(node_id: str, execution_type: NodeExecutionType, state: Nod node.id = node_id node.execution_type = execution_type node.state = state - node.node_type = NodeType.START + node.node_type = BuiltinNodeTypes.START return node diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py b/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py index 15d1dcb48d..64c2eee776 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_builder.py @@ -2,12 +2,12 @@ from unittest.mock import MagicMock import pytest -from core.workflow.enums import NodeType -from core.workflow.graph import Graph -from core.workflow.nodes.base.node import Node +from dify_graph.enums import BuiltinNodeTypes, NodeType +from dify_graph.graph import Graph +from dify_graph.nodes.base.node import Node -def _make_node(node_id: str, node_type: NodeType = NodeType.START) -> Node: +def _make_node(node_id: str, node_type: NodeType = BuiltinNodeTypes.START) -> Node: node = MagicMock(spec=Node) node.id = node_id node.node_type = node_type @@ -17,9 +17,9 @@ def _make_node(node_id: str, node_type: NodeType = NodeType.START) -> Node: def test_graph_builder_creates_linear_graph(): builder = Graph.new() - root = _make_node("root", NodeType.START) - mid = _make_node("mid", NodeType.LLM) - end = _make_node("end", NodeType.END) + root = _make_node("root", BuiltinNodeTypes.START) + mid = _make_node("mid", BuiltinNodeTypes.LLM) + end = _make_node("end", BuiltinNodeTypes.END) graph = builder.add_root(root).add_node(mid).add_node(end).build() diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py b/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py index 6858120335..75de07bd8b 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_skip_validation.py @@ -4,15 +4,13 @@ from typing import Any import pytest -from core.app.entities.app_invoke_entities import InvokeFrom -from core.app.workflow.node_factory import DifyNodeFactory -from core.workflow.entities import GraphInitParams -from core.workflow.graph import Graph -from core.workflow.graph.validation import GraphValidationError -from core.workflow.nodes import NodeType -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable -from models.enums import UserFrom +from core.workflow.node_factory import DifyNodeFactory +from dify_graph.graph import Graph +from dify_graph.graph.validation import GraphValidationError +from dify_graph.nodes import BuiltinNodeTypes +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable +from tests.workflow_test_utils import build_test_graph_init_params def _build_iteration_graph(node_id: str) -> dict[str, Any]: @@ -53,14 +51,14 @@ def _build_loop_graph(node_id: str) -> dict[str, Any]: def _make_factory(graph_config: dict[str, Any]) -> DifyNodeFactory: - graph_init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", + graph_init_params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.DEBUGGER, + user_from="account", + invoke_from="debugger", call_depth=0, ) graph_runtime_state = GraphRuntimeState( @@ -94,7 +92,7 @@ def test_iteration_root_requires_skip_validation(): ) assert graph.root_node.id == node_id - assert graph.root_node.node_type == NodeType.ITERATION + assert graph.root_node.node_type == BuiltinNodeTypes.ITERATION def test_loop_root_requires_skip_validation(): @@ -117,4 +115,4 @@ def test_loop_root_requires_skip_validation(): ) assert graph.root_node.id == node_id - assert graph.root_node.node_type == NodeType.LOOP + assert graph.root_node.node_type == BuiltinNodeTypes.LOOP diff --git a/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py b/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py index 5716aae4c7..e94ad74eb0 100644 --- a/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py +++ b/api/tests/unit_tests/core/workflow/graph/test_graph_validation.py @@ -6,25 +6,24 @@ from dataclasses import dataclass import pytest -from core.app.entities.app_invoke_entities import InvokeFrom -from core.workflow.entities import GraphInitParams -from core.workflow.enums import ErrorStrategy, NodeExecutionType, NodeType -from core.workflow.graph import Graph -from core.workflow.graph.validation import GraphValidationError -from core.workflow.nodes.base.entities import BaseNodeData -from core.workflow.nodes.base.node import Node -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable -from models.enums import UserFrom +from dify_graph.entities import GraphInitParams +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, ErrorStrategy, NodeExecutionType, NodeType +from dify_graph.graph import Graph +from dify_graph.graph.validation import GraphValidationError +from dify_graph.nodes.base.node import Node +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable +from tests.workflow_test_utils import build_test_graph_init_params class _TestNodeData(BaseNodeData): - type: NodeType | str | None = None + type: NodeType | None = None execution_type: NodeExecutionType | str | None = None class _TestNode(Node[_TestNodeData]): - node_type = NodeType.ANSWER + node_type = BuiltinNodeTypes.ANSWER execution_type = NodeExecutionType.EXECUTABLE @classmethod @@ -47,13 +46,8 @@ class _TestNode(Node[_TestNodeData]): ) node_type_value = self.data.get("type") - if isinstance(node_type_value, NodeType): + if isinstance(node_type_value, str): self.node_type = node_type_value - elif isinstance(node_type_value, str): - try: - self.node_type = NodeType(node_type_value) - except ValueError: - pass def _run(self): raise NotImplementedError @@ -92,14 +86,14 @@ class _SimpleNodeFactory: @pytest.fixture def graph_init_dependencies() -> tuple[_SimpleNodeFactory, dict[str, object]]: graph_config: dict[str, object] = {"edges": [], "nodes": []} - init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", + init_params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.SERVICE_API, + user_from="account", + invoke_from="service-api", call_depth=0, ) variable_pool = VariablePool(system_variables=SystemVariable(user_id="user", files=[]), user_inputs={}) @@ -113,14 +107,17 @@ def test_graph_initialization_runs_default_validators( ): node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, - {"id": "answer", "data": {"type": NodeType.ANSWER, "title": "Answer"}}, + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, + {"id": "answer", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "Answer"}}, ] graph_config["edges"] = [ {"source": "start", "target": "answer", "sourceHandle": "success"}, ] - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert graph.root_node.id == "start" assert "answer" in graph.nodes @@ -131,14 +128,17 @@ def test_graph_validation_fails_for_unknown_edge_targets( ) -> None: node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, ] graph_config["edges"] = [ {"source": "start", "target": "missing", "sourceHandle": "success"}, ] with pytest.raises(GraphValidationError) as exc: - Graph.init(graph_config=graph_config, node_factory=node_factory) + Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert any(issue.code == "MISSING_NODE" for issue in exc.value.issues) @@ -148,11 +148,14 @@ def test_graph_promotes_fail_branch_nodes_to_branch_execution_type( ) -> None: node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, { "id": "branch", "data": { - "type": NodeType.IF_ELSE, + "type": BuiltinNodeTypes.IF_ELSE, "title": "Branch", "error_strategy": ErrorStrategy.FAIL_BRANCH, }, @@ -162,25 +165,55 @@ def test_graph_promotes_fail_branch_nodes_to_branch_execution_type( {"source": "start", "target": "branch", "sourceHandle": "success"}, ] - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") assert graph.nodes["branch"].execution_type == NodeExecutionType.BRANCH -def test_graph_validation_blocks_start_and_trigger_coexistence( +def test_graph_init_ignores_custom_note_nodes_before_node_data_validation( graph_init_dependencies: tuple[_SimpleNodeFactory, dict[str, object]], ) -> None: node_factory, graph_config = graph_init_dependencies graph_config["nodes"] = [ - {"id": "start", "data": {"type": NodeType.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}}, { - "id": "trigger", - "data": {"type": NodeType.TRIGGER_WEBHOOK, "title": "Webhook", "execution_type": NodeExecutionType.ROOT}, + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, + }, + {"id": "answer", "data": {"type": BuiltinNodeTypes.ANSWER, "title": "Answer"}}, + { + "id": "note", + "type": "custom-note", + "data": { + "type": "", + "title": "", + "desc": "", + "text": "{}", + "theme": "blue", + }, + }, + ] + graph_config["edges"] = [ + {"source": "start", "target": "answer", "sourceHandle": "success"}, + ] + + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") + + assert graph.root_node.id == "start" + assert "answer" in graph.nodes + assert "note" not in graph.nodes + + +def test_graph_init_fails_for_unknown_root_node_id( + graph_init_dependencies: tuple[_SimpleNodeFactory, dict[str, object]], +) -> None: + node_factory, graph_config = graph_init_dependencies + graph_config["nodes"] = [ + { + "id": "start", + "data": {"type": BuiltinNodeTypes.START, "title": "Start", "execution_type": NodeExecutionType.ROOT}, }, ] graph_config["edges"] = [] - with pytest.raises(GraphValidationError) as exc_info: - Graph.init(graph_config=graph_config, node_factory=node_factory) - - assert any(issue.code == "TRIGGER_START_NODE_CONFLICT" for issue in exc_info.value.issues) + with pytest.raises(ValueError, match="Root node id missing not found in the graph"): + Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="missing") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/README.md b/api/tests/unit_tests/core/workflow/graph_engine/README.md index 3fff4cf6a9..40ed61eb02 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/README.md +++ b/api/tests/unit_tests/core/workflow/graph_engine/README.md @@ -68,7 +68,7 @@ print(f"Success rate: {suite_result.success_rate:.1f}%") #### Event Sequence Validation ```python -from core.workflow.graph_events import ( +from dify_graph.graph_events import ( GraphRunStartedEvent, NodeRunStartedEvent, NodeRunSucceededEvent, @@ -376,39 +376,39 @@ See `test_mock_example.py` for comprehensive examples including: ```bash # Run graph engine tests (includes property-based tests) -uv run pytest api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py +uv run pytest api/tests/unit_tests/dify_graph/graph_engine/test_graph_engine.py # Run with specific test patterns -uv run pytest api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py -k "test_echo" +uv run pytest api/tests/unit_tests/dify_graph/graph_engine/test_graph_engine.py -k "test_echo" # Run with verbose output -uv run pytest api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py -v +uv run pytest api/tests/unit_tests/dify_graph/graph_engine/test_graph_engine.py -v ``` ### Mock System Tests ```bash # Run auto-mock system tests -uv run pytest api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py +uv run pytest api/tests/unit_tests/dify_graph/graph_engine/test_auto_mock_system.py # Run examples -uv run python api/tests/unit_tests/core/workflow/graph_engine/test_mock_example.py +uv run python api/tests/unit_tests/dify_graph/graph_engine/test_mock_example.py # Run simple validation -uv run python api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py +uv run python api/tests/unit_tests/dify_graph/graph_engine/test_mock_simple.py ``` ### All Tests ```bash # Run all graph engine tests -uv run pytest api/tests/unit_tests/core/workflow/graph_engine/ +uv run pytest api/tests/unit_tests/dify_graph/graph_engine/ # Run with coverage -uv run pytest api/tests/unit_tests/core/workflow/graph_engine/ --cov=core.workflow.graph_engine +uv run pytest api/tests/unit_tests/dify_graph/graph_engine/ --cov=dify_graph.graph_engine # Run in parallel -uv run pytest api/tests/unit_tests/core/workflow/graph_engine/ -n auto +uv run pytest api/tests/unit_tests/dify_graph/graph_engine/ -n auto ``` ## Troubleshooting diff --git a/api/tests/unit_tests/core/workflow/graph_engine/command_channels/test_redis_channel.py b/api/tests/unit_tests/core/workflow/graph_engine/command_channels/test_redis_channel.py index db9b977e4a..4dec618e49 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/command_channels/test_redis_channel.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/command_channels/test_redis_channel.py @@ -3,15 +3,15 @@ import json from unittest.mock import MagicMock -from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel -from core.workflow.graph_engine.entities.commands import ( +from dify_graph.graph_engine.command_channels.redis_channel import RedisChannel +from dify_graph.graph_engine.entities.commands import ( AbortCommand, CommandType, GraphEngineCommand, UpdateVariablesCommand, VariableUpdate, ) -from core.workflow.variables import IntegerVariable, StringVariable +from dify_graph.variables import IntegerVariable, StringVariable class TestRedisChannel: diff --git a/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py b/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py index 5d17b7a243..6f821ba799 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_handlers.py @@ -2,18 +2,18 @@ from __future__ import annotations -from core.workflow.enums import NodeExecutionType, NodeState, NodeType, WorkflowNodeExecutionStatus -from core.workflow.graph import Graph -from core.workflow.graph_engine.domain.graph_execution import GraphExecution -from core.workflow.graph_engine.event_management.event_handlers import EventHandler -from core.workflow.graph_engine.event_management.event_manager import EventManager -from core.workflow.graph_engine.graph_state_manager import GraphStateManager -from core.workflow.graph_engine.ready_queue.in_memory import InMemoryReadyQueue -from core.workflow.graph_engine.response_coordinator.coordinator import ResponseStreamCoordinator -from core.workflow.graph_events import NodeRunRetryEvent, NodeRunStartedEvent -from core.workflow.node_events import NodeRunResult -from core.workflow.nodes.base.entities import RetryConfig -from core.workflow.runtime import GraphRuntimeState, VariablePool +from dify_graph.entities.base_node_data import RetryConfig +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState, WorkflowNodeExecutionStatus +from dify_graph.graph import Graph +from dify_graph.graph_engine.domain.graph_execution import GraphExecution +from dify_graph.graph_engine.event_management.event_handlers import EventHandler +from dify_graph.graph_engine.event_management.event_manager import EventManager +from dify_graph.graph_engine.graph_state_manager import GraphStateManager +from dify_graph.graph_engine.ready_queue.in_memory import InMemoryReadyQueue +from dify_graph.graph_engine.response_coordinator.coordinator import ResponseStreamCoordinator +from dify_graph.graph_events import NodeRunRetryEvent, NodeRunStartedEvent +from dify_graph.node_events import NodeRunResult +from dify_graph.runtime import GraphRuntimeState, VariablePool from libs.datetime_utils import naive_utc_now @@ -73,7 +73,7 @@ def test_retry_does_not_emit_additional_start_event() -> None: handler, event_manager, graph_execution = _build_event_handler(node_id) execution_id = "exec-1" - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE start_time = naive_utc_now() start_event = NodeRunStartedEvent( diff --git a/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_manager.py b/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_manager.py index 15eac6b537..25494dc647 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_manager.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/event_management/test_event_manager.py @@ -4,9 +4,9 @@ from __future__ import annotations import logging -from core.workflow.graph_engine.event_management.event_manager import EventManager -from core.workflow.graph_engine.layers.base import GraphEngineLayer -from core.workflow.graph_events import GraphEngineEvent +from dify_graph.graph_engine.event_management.event_manager import EventManager +from dify_graph.graph_engine.layers.base import GraphEngineLayer +from dify_graph.graph_events import GraphEngineEvent class _FaultyLayer(GraphEngineLayer): diff --git a/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/test_skip_propagator.py b/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/test_skip_propagator.py index 0019020ede..73d59ea4e9 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/test_skip_propagator.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/graph_traversal/test_skip_propagator.py @@ -2,9 +2,9 @@ from unittest.mock import MagicMock, create_autospec -from core.workflow.graph import Edge, Graph -from core.workflow.graph_engine.graph_state_manager import GraphStateManager -from core.workflow.graph_engine.graph_traversal.skip_propagator import SkipPropagator +from dify_graph.graph import Edge, Graph +from dify_graph.graph_engine.graph_state_manager import GraphStateManager +from dify_graph.graph_engine.graph_traversal.skip_propagator import SkipPropagator class TestSkipPropagator: diff --git a/api/tests/unit_tests/core/workflow/graph_engine/human_input_test_utils.py b/api/tests/unit_tests/core/workflow/graph_engine/human_input_test_utils.py index 2ef23c7f0f..fc8133f5e1 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/human_input_test_utils.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/human_input_test_utils.py @@ -7,8 +7,8 @@ from dataclasses import dataclass from datetime import datetime, timedelta from typing import Any -from core.workflow.nodes.human_input.enums import HumanInputFormStatus -from core.workflow.repositories.human_input_form_repository import ( +from dify_graph.nodes.human_input.enums import HumanInputFormStatus +from dify_graph.repositories.human_input_form_repository import ( FormCreateParams, HumanInputFormEntity, HumanInputFormRecipientEntity, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py index 903800ce88..9e7b3654b7 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/conftest.py @@ -10,7 +10,7 @@ from opentelemetry.sdk.trace.export import SimpleSpanProcessor from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter from opentelemetry.trace import set_tracer_provider -from core.workflow.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes @pytest.fixture @@ -44,7 +44,7 @@ def mock_start_node(): node.id = "test-start-node-id" node.title = "Start Node" node.execution_id = "test-start-execution-id" - node.node_type = NodeType.START + node.node_type = BuiltinNodeTypes.START return node @@ -55,7 +55,7 @@ def mock_llm_node(): node.id = "test-llm-node-id" node.title = "LLM Node" node.execution_id = "test-llm-execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM return node @@ -63,13 +63,13 @@ def mock_llm_node(): def mock_tool_node(): """Create a mock Tool Node with tool-specific attributes.""" from core.tools.entities.tool_entities import ToolProviderType - from core.workflow.nodes.tool.entities import ToolNodeData + from dify_graph.nodes.tool.entities import ToolNodeData node = MagicMock() node.id = "test-tool-node-id" node.title = "Test Tool Node" node.execution_id = "test-tool-execution-id" - node.node_type = NodeType.TOOL + node.node_type = BuiltinNodeTypes.TOOL tool_data = ToolNodeData( title="Test Tool Node", @@ -108,7 +108,7 @@ def mock_retrieval_node(): node.id = "test-retrieval-node-id" node.title = "Retrieval Node" node.execution_id = "test-retrieval-execution-id" - node.node_type = NodeType.KNOWLEDGE_RETRIEVAL + node.node_type = BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL return node @@ -117,8 +117,8 @@ def mock_result_event(): """Create a mock result event with NodeRunResult.""" from datetime import datetime - from core.workflow.graph_events.node import NodeRunSucceededEvent - from core.workflow.node_events.base import NodeRunResult + from dify_graph.graph_events.node import NodeRunSucceededEvent + from dify_graph.node_events.base import NodeRunResult node_run_result = NodeRunResult( inputs={"query": "test query"}, @@ -130,7 +130,7 @@ def mock_result_event(): return NodeRunSucceededEvent( id="test-execution-id", node_id="test-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=datetime.now(), node_run_result=node_run_result, ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_layer_initialization.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_layer_initialization.py index f1086c9936..db32527849 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_layer_initialization.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_layer_initialization.py @@ -2,13 +2,13 @@ from __future__ import annotations import pytest -from core.workflow.graph_engine import GraphEngine, GraphEngineConfig -from core.workflow.graph_engine.command_channels import InMemoryChannel -from core.workflow.graph_engine.layers.base import ( +from dify_graph.graph_engine import GraphEngine, GraphEngineConfig +from dify_graph.graph_engine.command_channels import InMemoryChannel +from dify_graph.graph_engine.layers.base import ( GraphEngineLayer, GraphEngineLayerNotInitializedError, ) -from core.workflow.graph_events import GraphEngineEvent +from dify_graph.graph_events import GraphEngineEvent from ..test_table_runner import WorkflowRunner diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py index 9a491d24e1..2a36f712fd 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_llm_quota.py @@ -4,18 +4,18 @@ from unittest.mock import MagicMock, patch from core.app.workflow.layers.llm_quota import LLMQuotaLayer from core.errors.error import QuotaExceededError -from core.model_runtime.entities.llm_entities import LLMUsage -from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus -from core.workflow.graph_engine.entities.commands import CommandType -from core.workflow.graph_events.node import NodeRunSucceededEvent -from core.workflow.node_events import NodeRunResult +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus +from dify_graph.graph_engine.entities.commands import CommandType +from dify_graph.graph_events.node import NodeRunSucceededEvent +from dify_graph.model_runtime.entities.llm_entities import LLMUsage +from dify_graph.node_events import NodeRunResult def _build_succeeded_event() -> NodeRunSucceededEvent: return NodeRunSucceededEvent( id="execution-id", node_id="llm-node-id", - node_type=NodeType.LLM, + node_type=BuiltinNodeTypes.LLM, start_at=datetime.now(), node_run_result=NodeRunResult( status=WorkflowNodeExecutionStatus.SUCCEEDED, @@ -30,8 +30,9 @@ def test_deduct_quota_called_for_successful_llm_node() -> None: node = MagicMock() node.id = "llm-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.tenant_id = "tenant-id" + node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() result_event = _build_succeeded_event() @@ -50,8 +51,9 @@ def test_deduct_quota_called_for_question_classifier_node() -> None: node = MagicMock() node.id = "question-classifier-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.QUESTION_CLASSIFIER + node.node_type = BuiltinNodeTypes.QUESTION_CLASSIFIER node.tenant_id = "tenant-id" + node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() result_event = _build_succeeded_event() @@ -70,8 +72,9 @@ def test_non_llm_node_is_ignored() -> None: node = MagicMock() node.id = "start-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.START + node.node_type = BuiltinNodeTypes.START node.tenant_id = "tenant-id" + node.require_dify_context.return_value.tenant_id = "tenant-id" node._model_instance = object() result_event = _build_succeeded_event() @@ -86,8 +89,9 @@ def test_quota_error_is_handled_in_layer() -> None: node = MagicMock() node.id = "llm-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.tenant_id = "tenant-id" + node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() result_event = _build_succeeded_event() @@ -107,8 +111,9 @@ def test_quota_deduction_exceeded_aborts_workflow_immediately() -> None: node = MagicMock() node.id = "llm-node-id" node.execution_id = "execution-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.tenant_id = "tenant-id" + node.require_dify_context.return_value.tenant_id = "tenant-id" node.model_instance = object() node.graph_runtime_state = MagicMock() node.graph_runtime_state.stop_event = stop_event @@ -135,7 +140,7 @@ def test_quota_precheck_failure_aborts_workflow_immediately() -> None: node = MagicMock() node.id = "llm-node-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.model_instance = object() node.graph_runtime_state = MagicMock() node.graph_runtime_state.stop_event = stop_event @@ -161,7 +166,7 @@ def test_quota_precheck_passes_without_abort() -> None: node = MagicMock() node.id = "llm-node-id" - node.node_type = NodeType.LLM + node.node_type = BuiltinNodeTypes.LLM node.model_instance = object() node.graph_runtime_state = MagicMock() node.graph_runtime_state.stop_event = stop_event diff --git a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py index ade846df28..478a2b592e 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/layers/test_observability.py @@ -16,7 +16,7 @@ import pytest from opentelemetry.trace import StatusCode from core.app.workflow.layers.observability import ObservabilityLayer -from core.workflow.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes class TestObservabilityLayerInitialization: @@ -29,7 +29,7 @@ class TestObservabilityLayerInitialization: layer = ObservabilityLayer() assert not layer._is_disabled assert layer._tracer is not None - assert NodeType.TOOL in layer._parsers + assert BuiltinNodeTypes.TOOL in layer._parsers assert layer._default_parser is not None @patch("core.app.workflow.layers.observability.dify_config.ENABLE_OTEL", False) @@ -39,7 +39,7 @@ class TestObservabilityLayerInitialization: layer = ObservabilityLayer() assert not layer._is_disabled assert layer._tracer is not None - assert NodeType.TOOL in layer._parsers + assert BuiltinNodeTypes.TOOL in layer._parsers assert layer._default_parser is not None @@ -117,7 +117,7 @@ class TestObservabilityLayerParserIntegration: attrs = spans[0].attributes assert attrs["node.id"] == mock_start_node.id assert attrs["node.execution_id"] == mock_start_node.execution_id - assert attrs["node.type"] == mock_start_node.node_type.value + assert attrs["node.type"] == mock_start_node.node_type @patch("core.app.workflow.layers.observability.dify_config.ENABLE_OTEL", True) @pytest.mark.usefixtures("mock_is_instrument_flag_enabled_false") @@ -144,7 +144,7 @@ class TestObservabilityLayerParserIntegration: self, tracer_provider_with_memory_exporter, memory_span_exporter, mock_llm_node, mock_result_event ): """Test that LLM parser is used for LLM nodes and extracts LLM-specific attributes.""" - from core.workflow.node_events.base import NodeRunResult + from dify_graph.node_events.base import NodeRunResult mock_result_event.node_run_result = NodeRunResult( inputs={}, @@ -182,7 +182,7 @@ class TestObservabilityLayerParserIntegration: self, tracer_provider_with_memory_exporter, memory_span_exporter, mock_retrieval_node, mock_result_event ): """Test that retrieval parser is used for retrieval nodes and extracts retrieval-specific attributes.""" - from core.workflow.node_events.base import NodeRunResult + from dify_graph.node_events.base import NodeRunResult mock_result_event.node_run_result = NodeRunResult( inputs={"query": "test query"}, @@ -210,7 +210,7 @@ class TestObservabilityLayerParserIntegration: self, tracer_provider_with_memory_exporter, memory_span_exporter, mock_start_node, mock_result_event ): """Test that result_event parameter allows parsers to extract inputs and outputs.""" - from core.workflow.node_events.base import NodeRunResult + from dify_graph.node_events.base import NodeRunResult mock_result_event.node_run_result = NodeRunResult( inputs={"input_key": "input_value"}, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py b/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py index c1fc4acd73..548c10ce8d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/orchestration/test_dispatcher.py @@ -5,18 +5,18 @@ from __future__ import annotations import queue from unittest import mock -from core.workflow.entities.pause_reason import SchedulingPause -from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus -from core.workflow.graph_engine.event_management.event_handlers import EventHandler -from core.workflow.graph_engine.orchestration.dispatcher import Dispatcher -from core.workflow.graph_engine.orchestration.execution_coordinator import ExecutionCoordinator -from core.workflow.graph_events import ( +from dify_graph.entities.pause_reason import SchedulingPause +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus +from dify_graph.graph_engine.event_management.event_handlers import EventHandler +from dify_graph.graph_engine.orchestration.dispatcher import Dispatcher +from dify_graph.graph_engine.orchestration.execution_coordinator import ExecutionCoordinator +from dify_graph.graph_events import ( GraphNodeEventBase, NodeRunPauseRequestedEvent, NodeRunStartedEvent, NodeRunSucceededEvent, ) -from core.workflow.node_events import NodeRunResult +from dify_graph.node_events import NodeRunResult from libs.datetime_utils import naive_utc_now @@ -26,7 +26,7 @@ def test_dispatcher_should_consume_remains_events_after_pause(): GraphNodeEventBase( id="test", node_id="test", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, ) ) event_handler = mock.Mock(spec=EventHandler) @@ -107,7 +107,7 @@ def _make_started_event() -> NodeRunStartedEvent: return NodeRunStartedEvent( id="start-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="Test Node", start_at=naive_utc_now(), ) @@ -117,7 +117,7 @@ def _make_succeeded_event() -> NodeRunSucceededEvent: return NodeRunSucceededEvent( id="success-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="Test Node", start_at=naive_utc_now(), node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), @@ -151,20 +151,20 @@ def test_dispatcher_drain_event_queue(): NodeRunStartedEvent( id="start-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, node_title="Code", start_at=naive_utc_now(), ), NodeRunPauseRequestedEvent( id="pause-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, reason=SchedulingPause(message="test pause"), ), NodeRunSucceededEvent( id="success-event", node_id="node-1", - node_type=NodeType.CODE, + node_type=BuiltinNodeTypes.CODE, start_at=naive_utc_now(), node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), ), diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_answer_end_with_text.py b/api/tests/unit_tests/core/workflow/graph_engine/test_answer_end_with_text.py index fd1e6fc6dc..7af6b26d87 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_answer_end_with_text.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_answer_end_with_text.py @@ -1,4 +1,4 @@ -from core.workflow.graph_events import ( +from dify_graph.graph_events import ( GraphRunStartedEvent, GraphRunSucceededEvent, NodeRunStartedEvent, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py b/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py index b291f95e0f..fc0d22f739 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_auto_mock_system.py @@ -7,7 +7,8 @@ for workflows containing nodes that require third-party services. import pytest -from core.workflow.enums import NodeType +from dify_graph.enums import BuiltinNodeTypes +from tests.workflow_test_utils import build_test_graph_init_params from .test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig from .test_table_runner import TableTestRunner, WorkflowTestCase @@ -199,22 +200,19 @@ def test_mock_config_builder(): def test_mock_factory_node_type_detection(): """Test that MockNodeFactory correctly identifies nodes to mock.""" - from core.app.entities.app_invoke_entities import InvokeFrom - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool - from models.enums import UserFrom + from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom + from dify_graph.runtime import GraphRuntimeState, VariablePool from .test_mock_factory import MockNodeFactory - graph_init_params = GraphInitParams( - tenant_id="test", - app_id="test", + graph_init_params = build_test_graph_init_params( workflow_id="test", graph_config={}, + tenant_id="test", + app_id="test", user_id="test", user_from=UserFrom.ACCOUNT, invoke_from=InvokeFrom.SERVICE_API, - call_depth=0, ) graph_runtime_state = GraphRuntimeState( variable_pool=VariablePool(environment_variables=[], conversation_variables=[], user_inputs={}), @@ -229,23 +227,23 @@ def test_mock_factory_node_type_detection(): ) # Test that third-party service nodes are identified for mocking - assert factory.should_mock_node(NodeType.LLM) - assert factory.should_mock_node(NodeType.AGENT) - assert factory.should_mock_node(NodeType.TOOL) - assert factory.should_mock_node(NodeType.KNOWLEDGE_RETRIEVAL) - assert factory.should_mock_node(NodeType.HTTP_REQUEST) - assert factory.should_mock_node(NodeType.PARAMETER_EXTRACTOR) - assert factory.should_mock_node(NodeType.DOCUMENT_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.LLM) + assert factory.should_mock_node(BuiltinNodeTypes.AGENT) + assert factory.should_mock_node(BuiltinNodeTypes.TOOL) + assert factory.should_mock_node(BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL) + assert factory.should_mock_node(BuiltinNodeTypes.HTTP_REQUEST) + assert factory.should_mock_node(BuiltinNodeTypes.PARAMETER_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.DOCUMENT_EXTRACTOR) # Test that CODE and TEMPLATE_TRANSFORM are mocked (they require SSRF proxy) - assert factory.should_mock_node(NodeType.CODE) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Test that non-service nodes are not mocked - assert not factory.should_mock_node(NodeType.START) - assert not factory.should_mock_node(NodeType.END) - assert not factory.should_mock_node(NodeType.IF_ELSE) - assert not factory.should_mock_node(NodeType.VARIABLE_AGGREGATOR) + assert not factory.should_mock_node(BuiltinNodeTypes.START) + assert not factory.should_mock_node(BuiltinNodeTypes.END) + assert not factory.should_mock_node(BuiltinNodeTypes.IF_ELSE) + assert not factory.should_mock_node(BuiltinNodeTypes.VARIABLE_AGGREGATOR) def test_custom_mock_handler(): @@ -309,11 +307,9 @@ def test_workflow_without_auto_mock(): def test_register_custom_mock_node(): """Test registering a custom mock implementation for a node type.""" - from core.app.entities.app_invoke_entities import InvokeFrom - from core.workflow.entities import GraphInitParams - from core.workflow.nodes.template_transform import TemplateTransformNode - from core.workflow.runtime import GraphRuntimeState, VariablePool - from models.enums import UserFrom + from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom + from dify_graph.nodes.template_transform import TemplateTransformNode + from dify_graph.runtime import GraphRuntimeState, VariablePool from .test_mock_factory import MockNodeFactory @@ -323,15 +319,14 @@ def test_register_custom_mock_node(): # Custom mock implementation pass - graph_init_params = GraphInitParams( - tenant_id="test", - app_id="test", + graph_init_params = build_test_graph_init_params( workflow_id="test", graph_config={}, + tenant_id="test", + app_id="test", user_id="test", user_from=UserFrom.ACCOUNT, invoke_from=InvokeFrom.SERVICE_API, - call_depth=0, ) graph_runtime_state = GraphRuntimeState( variable_pool=VariablePool(environment_variables=[], conversation_variables=[], user_inputs={}), @@ -346,15 +341,15 @@ def test_register_custom_mock_node(): ) # TEMPLATE_TRANSFORM is mocked by default (requires SSRF proxy) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Unregister mock - factory.unregister_mock_node_type(NodeType.TEMPLATE_TRANSFORM) - assert not factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.unregister_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM) + assert not factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Re-register custom mock - factory.register_mock_node_type(NodeType.TEMPLATE_TRANSFORM, MockTemplateTransformNode) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.register_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM, MockTemplateTransformNode) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) def test_default_config_by_node_type(): @@ -363,7 +358,7 @@ def test_default_config_by_node_type(): # Set default config for all LLM nodes mock_config.set_default_config( - NodeType.LLM, + BuiltinNodeTypes.LLM, { "default_response": "Default LLM response for all nodes", "temperature": 0.7, @@ -372,23 +367,23 @@ def test_default_config_by_node_type(): # Set default config for all HTTP nodes mock_config.set_default_config( - NodeType.HTTP_REQUEST, + BuiltinNodeTypes.HTTP_REQUEST, { "default_status": 200, "default_timeout": 30, }, ) - llm_config = mock_config.get_default_config(NodeType.LLM) + llm_config = mock_config.get_default_config(BuiltinNodeTypes.LLM) assert llm_config["default_response"] == "Default LLM response for all nodes" assert llm_config["temperature"] == 0.7 - http_config = mock_config.get_default_config(NodeType.HTTP_REQUEST) + http_config = mock_config.get_default_config(BuiltinNodeTypes.HTTP_REQUEST) assert http_config["default_status"] == 200 assert http_config["default_timeout"] == 30 # Non-configured node type should return empty dict - tool_config = mock_config.get_default_config(NodeType.TOOL) + tool_config = mock_config.get_default_config(BuiltinNodeTypes.TOOL) assert tool_config == {} diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_basic_chatflow.py b/api/tests/unit_tests/core/workflow/graph_engine/test_basic_chatflow.py index b04643b78a..30acbdaf3d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_basic_chatflow.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_basic_chatflow.py @@ -1,4 +1,4 @@ -from core.workflow.graph_events import ( +from dify_graph.graph_events import ( GraphRunStartedEvent, GraphRunSucceededEvent, NodeRunStartedEvent, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_command_system.py b/api/tests/unit_tests/core/workflow/graph_engine/test_command_system.py index 6c3700ea2b..765c4deba3 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_command_system.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_command_system.py @@ -3,24 +3,23 @@ import time from unittest.mock import MagicMock -from core.app.entities.app_invoke_entities import InvokeFrom -from core.workflow.entities.graph_init_params import GraphInitParams -from core.workflow.entities.pause_reason import SchedulingPause -from core.workflow.graph import Graph -from core.workflow.graph_engine import GraphEngine, GraphEngineConfig -from core.workflow.graph_engine.command_channels import InMemoryChannel -from core.workflow.graph_engine.entities.commands import ( +from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams +from dify_graph.entities.pause_reason import SchedulingPause +from dify_graph.graph import Graph +from dify_graph.graph_engine import GraphEngine, GraphEngineConfig +from dify_graph.graph_engine.command_channels import InMemoryChannel +from dify_graph.graph_engine.entities.commands import ( AbortCommand, CommandType, PauseCommand, UpdateVariablesCommand, VariableUpdate, ) -from core.workflow.graph_events import GraphRunAbortedEvent, GraphRunPausedEvent, GraphRunStartedEvent -from core.workflow.nodes.start.start_node import StartNode -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.variables import IntegerVariable, StringVariable -from models.enums import UserFrom +from dify_graph.graph_events import GraphRunAbortedEvent, GraphRunPausedEvent, GraphRunStartedEvent +from dify_graph.nodes.start.start_node import StartNode +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.variables import IntegerVariable, StringVariable def test_abort_command(): @@ -41,13 +40,17 @@ def test_abort_command(): id="start", config={"id": "start", "data": {"title": "start", "variables": []}}, graph_init_params=GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.DEBUGGER, + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.DEBUGGER, + } + }, call_depth=0, ), graph_runtime_state=shared_runtime_state, @@ -99,7 +102,7 @@ def test_redis_channel_serialization(): mock_redis.pipeline.return_value.__enter__ = MagicMock(return_value=mock_pipeline) mock_redis.pipeline.return_value.__exit__ = MagicMock(return_value=None) - from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel + from dify_graph.graph_engine.command_channels.redis_channel import RedisChannel # Create channel with a specific key channel = RedisChannel(mock_redis, channel_key="workflow:123:commands") @@ -151,13 +154,17 @@ def test_pause_command(): id="start", config={"id": "start", "data": {"title": "start", "variables": []}}, graph_init_params=GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.DEBUGGER, + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.DEBUGGER, + } + }, call_depth=0, ), graph_runtime_state=shared_runtime_state, @@ -207,13 +214,17 @@ def test_update_variables_command_updates_pool(): id="start", config={"id": "start", "data": {"title": "start", "variables": []}}, graph_init_params=GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.DEBUGGER, + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.DEBUGGER, + } + }, call_depth=0, ), graph_runtime_state=shared_runtime_state, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_complex_branch_workflow.py b/api/tests/unit_tests/core/workflow/graph_engine/test_complex_branch_workflow.py index 96926797ec..3a9a0b18bc 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_complex_branch_workflow.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_complex_branch_workflow.py @@ -7,7 +7,7 @@ This test suite validates the behavior of a workflow that: 3. Handles multiple answer nodes with different outputs """ -from core.workflow.graph_events import ( +from dify_graph.graph_events import ( GraphRunStartedEvent, GraphRunSucceededEvent, NodeRunStartedEvent, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py b/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py index ee944c8e3e..76bf179f33 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_conditional_streaming_vs_template_workflow.py @@ -6,10 +6,10 @@ This test validates that: - When blocking != 1: NodeRunStreamChunkEvent present (direct LLM to End output) """ -from core.workflow.enums import NodeType -from core.workflow.graph_engine import GraphEngine, GraphEngineConfig -from core.workflow.graph_engine.command_channels import InMemoryChannel -from core.workflow.graph_events import ( +from dify_graph.enums import BuiltinNodeTypes +from dify_graph.graph_engine import GraphEngine, GraphEngineConfig +from dify_graph.graph_engine.command_channels import InMemoryChannel +from dify_graph.graph_events import ( GraphRunSucceededEvent, NodeRunStartedEvent, NodeRunStreamChunkEvent, @@ -74,7 +74,11 @@ def test_streaming_output_with_blocking_equals_one(): # Find indices of first LLM success event and first stream chunk event llm2_start_index = next( - (i for i, e in enumerate(events) if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.LLM), + ( + i + for i, e in enumerate(events) + if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.LLM + ), -1, ) first_chunk_index = next( @@ -96,16 +100,16 @@ def test_streaming_output_with_blocking_equals_one(): # Check all Template's NodeRunStreamChunkEvent should has same id with Template's NodeRunStartedEvent start_events = [ - e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.TEMPLATE_TRANSFORM + e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.TEMPLATE_TRANSFORM ] - template_chunk_events = [e for e in stream_chunk_events if e.node_type == NodeType.TEMPLATE_TRANSFORM] + template_chunk_events = [e for e in stream_chunk_events if e.node_type == BuiltinNodeTypes.TEMPLATE_TRANSFORM] assert len(template_chunk_events) == 1, f"Expected 1 template chunk event, but got {len(template_chunk_events)}" assert all(e.id in [se.id for se in start_events] for e in template_chunk_events), ( "Expected all Template chunk events to have same id with Template's NodeRunStartedEvent" ) # Check that NodeRunStreamChunkEvent contains '\n' is from the End node - end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.END] + end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.END] assert len(end_events) == 1, f"Expected 1 end event, but got {len(end_events)}" newline_chunk_events = [e for e in stream_chunk_events if e.chunk == "\n"] assert len(newline_chunk_events) == 1, f"Expected 1 newline chunk event, but got {len(newline_chunk_events)}" @@ -168,7 +172,11 @@ def test_streaming_output_with_blocking_not_equals_one(): # Find indices of first LLM success event and first stream chunk event llm2_start_index = next( - (i for i, e in enumerate(events) if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.LLM), + ( + i + for i, e in enumerate(events) + if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.LLM + ), -1, ) first_chunk_index = next( @@ -194,15 +202,15 @@ def test_streaming_output_with_blocking_not_equals_one(): assert all(e.id == start_event.id for e in query_chunk_events), "Expected all query chunk events to have same id" # Check all LLM's NodeRunStreamChunkEvent should be from LLM nodes - start_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.LLM] - llm_chunk_events = [e for e in stream_chunk_events if e.node_type == NodeType.LLM] + start_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.LLM] + llm_chunk_events = [e for e in stream_chunk_events if e.node_type == BuiltinNodeTypes.LLM] llm_node_ids = {se.node_id for se in start_events} assert all(e.node_id in llm_node_ids for e in llm_chunk_events), ( "Expected all LLM chunk events to be from LLM nodes" ) # Check that NodeRunStreamChunkEvent contains '\n' is from the End node - end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.END] + end_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.END] assert len(end_events) == 1, f"Expected 1 end event, but got {len(end_events)}" newline_chunk_events = [e for e in stream_chunk_events if e.chunk == "\n"] assert len(newline_chunk_events) == 1, f"Expected 1 newline chunk event, but got {len(newline_chunk_events)}" diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py b/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py index bf8034487c..778dad5952 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_dispatcher_pause_drain.py @@ -1,10 +1,10 @@ import queue from datetime import datetime -from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus -from core.workflow.graph_engine.orchestration.dispatcher import Dispatcher -from core.workflow.graph_events import NodeRunSucceededEvent -from core.workflow.node_events import NodeRunResult +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus +from dify_graph.graph_engine.orchestration.dispatcher import Dispatcher +from dify_graph.graph_events import NodeRunSucceededEvent +from dify_graph.node_events import NodeRunResult class StubExecutionCoordinator: @@ -51,7 +51,7 @@ def test_dispatcher_drains_events_when_paused() -> None: event = NodeRunSucceededEvent( id="exec-1", node_id="node-1", - node_type=NodeType.START, + node_type=BuiltinNodeTypes.START, start_at=datetime.utcnow(), node_run_result=NodeRunResult(status=WorkflowNodeExecutionStatus.SUCCEEDED), ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_end_node_without_value_type.py b/api/tests/unit_tests/core/workflow/graph_engine/test_end_node_without_value_type.py index b1380cd6d2..c87dc75b95 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_end_node_without_value_type.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_end_node_without_value_type.py @@ -6,7 +6,7 @@ field is missing from the output configuration, ensuring backward compatibility with older workflow definitions. """ -from core.workflow.graph_events import ( +from dify_graph.graph_events import ( GraphRunStartedEvent, GraphRunSucceededEvent, NodeRunStartedEvent, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_execution_coordinator.py b/api/tests/unit_tests/core/workflow/graph_engine/test_execution_coordinator.py index 53de8908a8..35406997ed 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_execution_coordinator.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_execution_coordinator.py @@ -4,11 +4,11 @@ from unittest.mock import MagicMock import pytest -from core.workflow.graph_engine.command_processing.command_processor import CommandProcessor -from core.workflow.graph_engine.domain.graph_execution import GraphExecution -from core.workflow.graph_engine.graph_state_manager import GraphStateManager -from core.workflow.graph_engine.orchestration.execution_coordinator import ExecutionCoordinator -from core.workflow.graph_engine.worker_management.worker_pool import WorkerPool +from dify_graph.graph_engine.command_processing.command_processor import CommandProcessor +from dify_graph.graph_engine.domain.graph_execution import GraphExecution +from dify_graph.graph_engine.graph_state_manager import GraphStateManager +from dify_graph.graph_engine.orchestration.execution_coordinator import ExecutionCoordinator +from dify_graph.graph_engine.worker_management.worker_pool import WorkerPool def _build_coordinator(graph_execution: GraphExecution) -> tuple[ExecutionCoordinator, MagicMock, MagicMock]: diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py index 5a55d7086e..4e13177d2b 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_engine.py @@ -10,15 +10,15 @@ import time from hypothesis import HealthCheck, given, settings from hypothesis import strategies as st -from core.workflow.enums import ErrorStrategy -from core.workflow.graph_engine import GraphEngine, GraphEngineConfig -from core.workflow.graph_engine.command_channels import InMemoryChannel -from core.workflow.graph_events import ( +from dify_graph.entities.base_node_data import DefaultValue, DefaultValueType +from dify_graph.enums import ErrorStrategy +from dify_graph.graph_engine import GraphEngine, GraphEngineConfig +from dify_graph.graph_engine.command_channels import InMemoryChannel +from dify_graph.graph_events import ( GraphRunPartialSucceededEvent, GraphRunStartedEvent, GraphRunSucceededEvent, ) -from core.workflow.nodes.base.entities import DefaultValue, DefaultValueType # Import the test framework from the new module from .test_mock_config import MockConfigBuilder @@ -455,7 +455,7 @@ def test_if_else_workflow_property_diverse_inputs(query_input): # Tests for the Layer system def test_layer_system_basic(): """Test basic layer functionality with DebugLoggingLayer.""" - from core.workflow.graph_engine.layers import DebugLoggingLayer + from dify_graph.graph_engine.layers import DebugLoggingLayer runner = WorkflowRunner() @@ -495,7 +495,7 @@ def test_layer_system_basic(): def test_layer_chaining(): """Test chaining multiple layers.""" - from core.workflow.graph_engine.layers import DebugLoggingLayer, GraphEngineLayer + from dify_graph.graph_engine.layers import DebugLoggingLayer, GraphEngineLayer # Create a custom test layer class TestLayer(GraphEngineLayer): @@ -549,7 +549,7 @@ def test_layer_chaining(): def test_layer_error_handling(): """Test that layer errors don't crash the engine.""" - from core.workflow.graph_engine.layers import GraphEngineLayer + from dify_graph.graph_engine.layers import GraphEngineLayer # Create a layer that throws errors class FaultyLayer(GraphEngineLayer): @@ -591,7 +591,7 @@ def test_layer_error_handling(): def test_event_sequence_validation(): """Test the new event sequence validation feature.""" - from core.workflow.graph_events import NodeRunStartedEvent, NodeRunStreamChunkEvent, NodeRunSucceededEvent + from dify_graph.graph_events import NodeRunStartedEvent, NodeRunStreamChunkEvent, NodeRunSucceededEvent runner = TableTestRunner() @@ -678,7 +678,7 @@ def test_event_sequence_validation(): def test_event_sequence_validation_with_table_tests(): """Test event sequence validation with table-driven tests.""" - from core.workflow.graph_events import NodeRunStartedEvent, NodeRunStreamChunkEvent, NodeRunSucceededEvent + from dify_graph.graph_events import NodeRunStartedEvent, NodeRunStreamChunkEvent, NodeRunSucceededEvent runner = TableTestRunner() diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py index 6385b0b91f..255784b77d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_execution_serialization.py @@ -6,13 +6,13 @@ import json from collections import deque from unittest.mock import MagicMock -from core.workflow.enums import NodeExecutionType, NodeState, NodeType -from core.workflow.graph_engine.domain import GraphExecution -from core.workflow.graph_engine.response_coordinator import ResponseStreamCoordinator -from core.workflow.graph_engine.response_coordinator.path import Path -from core.workflow.graph_engine.response_coordinator.session import ResponseSession -from core.workflow.graph_events import NodeRunStreamChunkEvent -from core.workflow.nodes.base.template import Template, TextSegment, VariableSegment +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState +from dify_graph.graph_engine.domain import GraphExecution +from dify_graph.graph_engine.response_coordinator import ResponseStreamCoordinator +from dify_graph.graph_engine.response_coordinator.path import Path +from dify_graph.graph_engine.response_coordinator.session import ResponseSession +from dify_graph.graph_events import NodeRunStreamChunkEvent +from dify_graph.nodes.base.template import Template, TextSegment, VariableSegment class CustomGraphExecutionError(Exception): @@ -101,7 +101,9 @@ def test_response_stream_coordinator_serialization_round_trip(monkeypatch) -> No class DummyNode: def __init__(self, node_id: str, template: Template, execution_type: NodeExecutionType) -> None: self.id = node_id - self.node_type = NodeType.ANSWER if execution_type == NodeExecutionType.RESPONSE else NodeType.LLM + self.node_type = ( + BuiltinNodeTypes.ANSWER if execution_type == NodeExecutionType.RESPONSE else BuiltinNodeTypes.LLM + ) self.execution_type = execution_type self.state = NodeState.UNKNOWN self.title = node_id @@ -160,7 +162,7 @@ def test_response_stream_coordinator_serialization_round_trip(monkeypatch) -> No event = NodeRunStreamChunkEvent( id="exec-1", node_id="response-1", - node_type=NodeType.ANSWER, + node_type=BuiltinNodeTypes.ANSWER, selector=["node-source", "text"], chunk="chunk-1", is_final=False, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_state_snapshot.py b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_state_snapshot.py index 65d34c2009..d54f0be190 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_graph_state_snapshot.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_graph_state_snapshot.py @@ -1,26 +1,27 @@ import time from collections.abc import Mapping -from core.model_runtime.entities.llm_entities import LLMMode -from core.model_runtime.entities.message_entities import PromptMessageRole -from core.workflow.entities import GraphInitParams -from core.workflow.enums import NodeState -from core.workflow.graph import Graph -from core.workflow.graph_engine.graph_state_manager import GraphStateManager -from core.workflow.graph_engine.ready_queue import InMemoryReadyQueue -from core.workflow.nodes.end.end_node import EndNode -from core.workflow.nodes.end.entities import EndNodeData -from core.workflow.nodes.llm.entities import ( +from dify_graph.entities import GraphInitParams +from dify_graph.enums import NodeState +from dify_graph.graph import Graph +from dify_graph.graph_engine.graph_state_manager import GraphStateManager +from dify_graph.graph_engine.ready_queue import InMemoryReadyQueue +from dify_graph.model_runtime.entities.llm_entities import LLMMode +from dify_graph.model_runtime.entities.message_entities import PromptMessageRole +from dify_graph.nodes.end.end_node import EndNode +from dify_graph.nodes.end.entities import EndNodeData +from dify_graph.nodes.llm.entities import ( ContextConfig, LLMNodeChatModelMessage, LLMNodeData, ModelConfig, VisionConfig, ) -from core.workflow.nodes.start.entities import StartNodeData -from core.workflow.nodes.start.start_node import StartNode -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable +from dify_graph.nodes.start.entities import StartNodeData +from dify_graph.nodes.start.start_node import StartNode +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable +from tests.workflow_test_utils import build_test_graph_init_params from .test_mock_config import MockConfig from .test_mock_nodes import MockLLMNode @@ -73,11 +74,11 @@ def _build_llm_node( def _build_graph(runtime_state: GraphRuntimeState) -> Graph: graph_config: dict[str, object] = {"nodes": [], "edges": []} - graph_init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", + graph_init_params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", user_from="account", invoke_from="debugger", diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_human_input_pause_multi_branch.py b/api/tests/unit_tests/core/workflow/graph_engine/test_human_input_pause_multi_branch.py index b117b26b4c..538f53c603 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_human_input_pause_multi_branch.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_human_input_pause_multi_branch.py @@ -4,10 +4,8 @@ from collections.abc import Iterable from unittest import mock from unittest.mock import MagicMock -from core.model_runtime.entities.message_entities import PromptMessageRole -from core.workflow.entities import GraphInitParams -from core.workflow.graph import Graph -from core.workflow.graph_events import ( +from dify_graph.graph import Graph +from dify_graph.graph_events import ( GraphRunPausedEvent, GraphRunStartedEvent, GraphRunSucceededEvent, @@ -16,25 +14,27 @@ from core.workflow.graph_events import ( NodeRunStreamChunkEvent, NodeRunSucceededEvent, ) -from core.workflow.graph_events.node import NodeRunHumanInputFormFilledEvent -from core.workflow.nodes.base.entities import OutputVariableEntity, OutputVariableType -from core.workflow.nodes.end.end_node import EndNode -from core.workflow.nodes.end.entities import EndNodeData -from core.workflow.nodes.human_input.entities import HumanInputNodeData, UserAction -from core.workflow.nodes.human_input.human_input_node import HumanInputNode -from core.workflow.nodes.llm.entities import ( +from dify_graph.graph_events.node import NodeRunHumanInputFormFilledEvent +from dify_graph.model_runtime.entities.message_entities import PromptMessageRole +from dify_graph.nodes.base.entities import OutputVariableEntity, OutputVariableType +from dify_graph.nodes.end.end_node import EndNode +from dify_graph.nodes.end.entities import EndNodeData +from dify_graph.nodes.human_input.entities import HumanInputNodeData, UserAction +from dify_graph.nodes.human_input.human_input_node import HumanInputNode +from dify_graph.nodes.llm.entities import ( ContextConfig, LLMNodeChatModelMessage, LLMNodeData, ModelConfig, VisionConfig, ) -from core.workflow.nodes.start.entities import StartNodeData -from core.workflow.nodes.start.start_node import StartNode -from core.workflow.repositories.human_input_form_repository import HumanInputFormEntity, HumanInputFormRepository -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable +from dify_graph.nodes.start.entities import StartNodeData +from dify_graph.nodes.start.start_node import StartNode +from dify_graph.repositories.human_input_form_repository import HumanInputFormEntity, HumanInputFormRepository +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable from libs.datetime_utils import naive_utc_now +from tests.workflow_test_utils import build_test_graph_init_params from .test_mock_config import MockConfig from .test_mock_nodes import MockLLMNode @@ -47,11 +47,11 @@ def _build_branching_graph( graph_runtime_state: GraphRuntimeState | None = None, ) -> tuple[Graph, GraphRuntimeState]: graph_config: dict[str, object] = {"nodes": [], "edges": []} - graph_init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", + graph_init_params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", user_from="account", invoke_from="debugger", diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_human_input_pause_single_branch.py b/api/tests/unit_tests/core/workflow/graph_engine/test_human_input_pause_single_branch.py index 45505909ea..36bba6deb6 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_human_input_pause_single_branch.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_human_input_pause_single_branch.py @@ -3,10 +3,8 @@ import time from unittest import mock from unittest.mock import MagicMock -from core.model_runtime.entities.message_entities import PromptMessageRole -from core.workflow.entities import GraphInitParams -from core.workflow.graph import Graph -from core.workflow.graph_events import ( +from dify_graph.graph import Graph +from dify_graph.graph_events import ( GraphRunPausedEvent, GraphRunStartedEvent, GraphRunSucceededEvent, @@ -15,25 +13,27 @@ from core.workflow.graph_events import ( NodeRunStreamChunkEvent, NodeRunSucceededEvent, ) -from core.workflow.graph_events.node import NodeRunHumanInputFormFilledEvent -from core.workflow.nodes.base.entities import OutputVariableEntity, OutputVariableType -from core.workflow.nodes.end.end_node import EndNode -from core.workflow.nodes.end.entities import EndNodeData -from core.workflow.nodes.human_input.entities import HumanInputNodeData, UserAction -from core.workflow.nodes.human_input.human_input_node import HumanInputNode -from core.workflow.nodes.llm.entities import ( +from dify_graph.graph_events.node import NodeRunHumanInputFormFilledEvent +from dify_graph.model_runtime.entities.message_entities import PromptMessageRole +from dify_graph.nodes.base.entities import OutputVariableEntity, OutputVariableType +from dify_graph.nodes.end.end_node import EndNode +from dify_graph.nodes.end.entities import EndNodeData +from dify_graph.nodes.human_input.entities import HumanInputNodeData, UserAction +from dify_graph.nodes.human_input.human_input_node import HumanInputNode +from dify_graph.nodes.llm.entities import ( ContextConfig, LLMNodeChatModelMessage, LLMNodeData, ModelConfig, VisionConfig, ) -from core.workflow.nodes.start.entities import StartNodeData -from core.workflow.nodes.start.start_node import StartNode -from core.workflow.repositories.human_input_form_repository import HumanInputFormEntity, HumanInputFormRepository -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable +from dify_graph.nodes.start.entities import StartNodeData +from dify_graph.nodes.start.start_node import StartNode +from dify_graph.repositories.human_input_form_repository import HumanInputFormEntity, HumanInputFormRepository +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable from libs.datetime_utils import naive_utc_now +from tests.workflow_test_utils import build_test_graph_init_params from .test_mock_config import MockConfig from .test_mock_nodes import MockLLMNode @@ -46,11 +46,11 @@ def _build_llm_human_llm_graph( graph_runtime_state: GraphRuntimeState | None = None, ) -> tuple[Graph, GraphRuntimeState]: graph_config: dict[str, object] = {"nodes": [], "edges": []} - graph_init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", + graph_init_params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", user_from="account", invoke_from="debugger", diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_if_else_streaming.py b/api/tests/unit_tests/core/workflow/graph_engine/test_if_else_streaming.py index f33d37e8ff..8da179c15e 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_if_else_streaming.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_if_else_streaming.py @@ -1,34 +1,34 @@ import time from unittest import mock -from core.model_runtime.entities.llm_entities import LLMMode -from core.model_runtime.entities.message_entities import PromptMessageRole -from core.workflow.entities import GraphInitParams -from core.workflow.graph import Graph -from core.workflow.graph_events import ( +from dify_graph.graph import Graph +from dify_graph.graph_events import ( GraphRunStartedEvent, GraphRunSucceededEvent, NodeRunStartedEvent, NodeRunStreamChunkEvent, NodeRunSucceededEvent, ) -from core.workflow.nodes.base.entities import OutputVariableEntity, OutputVariableType -from core.workflow.nodes.end.end_node import EndNode -from core.workflow.nodes.end.entities import EndNodeData -from core.workflow.nodes.if_else.entities import IfElseNodeData -from core.workflow.nodes.if_else.if_else_node import IfElseNode -from core.workflow.nodes.llm.entities import ( +from dify_graph.model_runtime.entities.llm_entities import LLMMode +from dify_graph.model_runtime.entities.message_entities import PromptMessageRole +from dify_graph.nodes.base.entities import OutputVariableEntity, OutputVariableType +from dify_graph.nodes.end.end_node import EndNode +from dify_graph.nodes.end.entities import EndNodeData +from dify_graph.nodes.if_else.entities import IfElseNodeData +from dify_graph.nodes.if_else.if_else_node import IfElseNode +from dify_graph.nodes.llm.entities import ( ContextConfig, LLMNodeChatModelMessage, LLMNodeData, ModelConfig, VisionConfig, ) -from core.workflow.nodes.start.entities import StartNodeData -from core.workflow.nodes.start.start_node import StartNode -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable -from core.workflow.utils.condition.entities import Condition +from dify_graph.nodes.start.entities import StartNodeData +from dify_graph.nodes.start.start_node import StartNode +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable +from dify_graph.utils.condition.entities import Condition +from tests.workflow_test_utils import build_test_graph_init_params from .test_mock_config import MockConfig from .test_mock_nodes import MockLLMNode @@ -37,15 +37,10 @@ from .test_table_runner import TableTestRunner, WorkflowTestCase def _build_if_else_graph(branch_value: str, mock_config: MockConfig) -> tuple[Graph, GraphRuntimeState]: graph_config: dict[str, object] = {"nodes": [], "edges": []} - graph_init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", - workflow_id="workflow", + graph_init_params = build_test_graph_init_params( graph_config=graph_config, - user_id="user", user_from="account", invoke_from="debugger", - call_depth=0, ) variable_pool = VariablePool( diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_loop_contains_answer.py b/api/tests/unit_tests/core/workflow/graph_engine/test_loop_contains_answer.py index 3e21a5b44d..733fd53bc8 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_loop_contains_answer.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_loop_contains_answer.py @@ -5,7 +5,7 @@ This test validates the behavior of a loop containing an answer node inside the loop that may produce output errors. """ -from core.workflow.graph_events import ( +from dify_graph.graph_events import ( GraphRunStartedEvent, GraphRunSucceededEvent, NodeRunLoopNextEvent, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_loop_with_tool.py b/api/tests/unit_tests/core/workflow/graph_engine/test_loop_with_tool.py index d88c1d9f9e..6ff2722f78 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_loop_with_tool.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_loop_with_tool.py @@ -1,4 +1,4 @@ -from core.workflow.graph_events import ( +from dify_graph.graph_events import ( GraphRunStartedEvent, GraphRunSucceededEvent, NodeRunLoopNextEvent, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py index 5ceb8dd7f7..8a4649693d 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_config.py @@ -11,8 +11,6 @@ from collections.abc import Callable from dataclasses import dataclass, field from typing import Any -from core.workflow.enums import NodeType - @dataclass class NodeMockConfig: diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py index b862cbe89e..93010eea54 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_factory.py @@ -5,12 +5,12 @@ This module provides a MockNodeFactory that automatically detects and mocks node requiring external services (LLM, Agent, Tool, Knowledge Retrieval, HTTP Request). """ -from collections.abc import Mapping from typing import TYPE_CHECKING, Any -from core.app.workflow.node_factory import DifyNodeFactory -from core.workflow.enums import NodeType -from core.workflow.nodes.base.node import Node +from core.workflow.node_factory import DifyNodeFactory +from dify_graph.entities.graph_config import NodeConfigDict, NodeConfigDictAdapter +from dify_graph.enums import BuiltinNodeTypes, NodeType +from dify_graph.nodes.base.node import Node from .test_mock_nodes import ( MockAgentNode, @@ -28,8 +28,8 @@ from .test_mock_nodes import ( ) if TYPE_CHECKING: - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState from .test_mock_config import MockConfig @@ -61,63 +61,51 @@ class MockNodeFactory(DifyNodeFactory): # Map of node types that should be mocked self._mock_node_types = { - NodeType.LLM: MockLLMNode, - NodeType.AGENT: MockAgentNode, - NodeType.TOOL: MockToolNode, - NodeType.KNOWLEDGE_RETRIEVAL: MockKnowledgeRetrievalNode, - NodeType.HTTP_REQUEST: MockHttpRequestNode, - NodeType.QUESTION_CLASSIFIER: MockQuestionClassifierNode, - NodeType.PARAMETER_EXTRACTOR: MockParameterExtractorNode, - NodeType.DOCUMENT_EXTRACTOR: MockDocumentExtractorNode, - NodeType.ITERATION: MockIterationNode, - NodeType.LOOP: MockLoopNode, - NodeType.TEMPLATE_TRANSFORM: MockTemplateTransformNode, - NodeType.CODE: MockCodeNode, + BuiltinNodeTypes.LLM: MockLLMNode, + BuiltinNodeTypes.AGENT: MockAgentNode, + BuiltinNodeTypes.TOOL: MockToolNode, + BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL: MockKnowledgeRetrievalNode, + BuiltinNodeTypes.HTTP_REQUEST: MockHttpRequestNode, + BuiltinNodeTypes.QUESTION_CLASSIFIER: MockQuestionClassifierNode, + BuiltinNodeTypes.PARAMETER_EXTRACTOR: MockParameterExtractorNode, + BuiltinNodeTypes.DOCUMENT_EXTRACTOR: MockDocumentExtractorNode, + BuiltinNodeTypes.ITERATION: MockIterationNode, + BuiltinNodeTypes.LOOP: MockLoopNode, + BuiltinNodeTypes.TEMPLATE_TRANSFORM: MockTemplateTransformNode, + BuiltinNodeTypes.CODE: MockCodeNode, } - def create_node(self, node_config: Mapping[str, Any]) -> Node: + def create_node(self, node_config: dict[str, Any] | NodeConfigDict) -> Node: """ Create a node instance, using mock implementations for third-party service nodes. :param node_config: Node configuration dictionary :return: Node instance (real or mocked) """ - # Get node type from config - node_data = node_config.get("data", {}) - node_type_str = node_data.get("type") - - if not node_type_str: - # Fall back to parent implementation for nodes without type - return super().create_node(node_config) - - try: - node_type = NodeType(node_type_str) - except ValueError: - # Unknown node type, use parent implementation - return super().create_node(node_config) + typed_node_config = NodeConfigDictAdapter.validate_python(node_config) + node_data = typed_node_config["data"] + node_type = node_data.type # Check if this node type should be mocked if node_type in self._mock_node_types: - node_id = node_config.get("id") - if not node_id: - raise ValueError("Node config missing id") + node_id = typed_node_config["id"] # Create mock node instance mock_class = self._mock_node_types[node_type] - if node_type == NodeType.CODE: + if node_type == BuiltinNodeTypes.CODE: mock_instance = mock_class( id=node_id, - config=node_config, + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, mock_config=self.mock_config, code_executor=self._code_executor, code_limits=self._code_limits, ) - elif node_type == NodeType.HTTP_REQUEST: + elif node_type == BuiltinNodeTypes.HTTP_REQUEST: mock_instance = mock_class( id=node_id, - config=node_config, + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, mock_config=self.mock_config, @@ -126,10 +114,14 @@ class MockNodeFactory(DifyNodeFactory): tool_file_manager_factory=self._http_request_tool_file_manager_factory, file_manager=self._http_request_file_manager, ) - elif node_type in {NodeType.LLM, NodeType.QUESTION_CLASSIFIER, NodeType.PARAMETER_EXTRACTOR}: + elif node_type in { + BuiltinNodeTypes.LLM, + BuiltinNodeTypes.QUESTION_CLASSIFIER, + BuiltinNodeTypes.PARAMETER_EXTRACTOR, + }: mock_instance = mock_class( id=node_id, - config=node_config, + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, mock_config=self.mock_config, @@ -139,7 +131,7 @@ class MockNodeFactory(DifyNodeFactory): else: mock_instance = mock_class( id=node_id, - config=node_config, + config=typed_node_config, graph_init_params=self.graph_init_params, graph_runtime_state=self.graph_runtime_state, mock_config=self.mock_config, @@ -148,7 +140,7 @@ class MockNodeFactory(DifyNodeFactory): return mock_instance # For non-mocked node types, use parent implementation - return super().create_node(node_config) + return super().create_node(typed_node_config) def should_mock_node(self, node_type: NodeType) -> bool: """ diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py index aae4de9a27..3e4247f33f 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_iteration_simple.py @@ -2,34 +2,31 @@ Simple test to verify MockNodeFactory works with iteration nodes. """ -import sys -from pathlib import Path - -# Add api directory to path -api_dir = Path(__file__).parent.parent.parent.parent.parent.parent -sys.path.insert(0, str(api_dir)) - -from core.workflow.enums import NodeType +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY +from dify_graph.enums import BuiltinNodeTypes from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfigBuilder from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory def test_mock_factory_registers_iteration_node(): """Test that MockNodeFactory has iteration node registered.""" - from core.app.entities.app_invoke_entities import InvokeFrom - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool - from models.enums import UserFrom + from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create a MockNodeFactory instance graph_init_params = GraphInitParams( - tenant_id="test", - app_id="test", workflow_id="test", graph_config={"nodes": [], "edges": []}, - user_id="test", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.SERVICE_API, + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test", + "app_id": "test", + "user_id": "test", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.SERVICE_API, + } + }, call_depth=0, ) graph_runtime_state = GraphRuntimeState( @@ -45,30 +42,29 @@ def test_mock_factory_registers_iteration_node(): ) # Check that iteration node is registered - assert NodeType.ITERATION in factory._mock_node_types + assert BuiltinNodeTypes.ITERATION in factory._mock_node_types print("✓ Iteration node is registered in MockNodeFactory") # Check that loop node is registered - assert NodeType.LOOP in factory._mock_node_types + assert BuiltinNodeTypes.LOOP in factory._mock_node_types print("✓ Loop node is registered in MockNodeFactory") # Check the class types from tests.unit_tests.core.workflow.graph_engine.test_mock_nodes import MockIterationNode, MockLoopNode - assert factory._mock_node_types[NodeType.ITERATION] == MockIterationNode + assert factory._mock_node_types[BuiltinNodeTypes.ITERATION] == MockIterationNode print("✓ Iteration node maps to MockIterationNode class") - assert factory._mock_node_types[NodeType.LOOP] == MockLoopNode + assert factory._mock_node_types[BuiltinNodeTypes.LOOP] == MockLoopNode print("✓ Loop node maps to MockLoopNode class") def test_mock_iteration_node_preserves_config(): """Test that MockIterationNode preserves mock configuration.""" - from core.app.entities.app_invoke_entities import InvokeFrom - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool - from models.enums import UserFrom + from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool from tests.unit_tests.core.workflow.graph_engine.test_mock_nodes import MockIterationNode # Create mock config @@ -76,13 +72,17 @@ def test_mock_iteration_node_preserves_config(): # Create minimal graph init params graph_init_params = GraphInitParams( - tenant_id="test", - app_id="test", workflow_id="test", graph_config={"nodes": [], "edges": []}, - user_id="test", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.SERVICE_API, + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test", + "app_id": "test", + "user_id": "test", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.SERVICE_API, + } + }, call_depth=0, ) @@ -127,10 +127,9 @@ def test_mock_iteration_node_preserves_config(): def test_mock_loop_node_preserves_config(): """Test that MockLoopNode preserves mock configuration.""" - from core.app.entities.app_invoke_entities import InvokeFrom - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool - from models.enums import UserFrom + from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool from tests.unit_tests.core.workflow.graph_engine.test_mock_nodes import MockLoopNode # Create mock config @@ -138,13 +137,17 @@ def test_mock_loop_node_preserves_config(): # Create minimal graph init params graph_init_params = GraphInitParams( - tenant_id="test", - app_id="test", workflow_id="test", graph_config={"nodes": [], "edges": []}, - user_id="test", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.SERVICE_API, + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test", + "app_id": "test", + "user_id": "test", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.SERVICE_API, + } + }, call_depth=0, ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py index 5aed463a45..454263bef9 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes.py @@ -11,28 +11,45 @@ from typing import TYPE_CHECKING, Any, Optional from unittest.mock import MagicMock from core.model_manager import ModelInstance -from core.model_runtime.entities.llm_entities import LLMUsage -from core.workflow.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus -from core.workflow.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent from core.workflow.nodes.agent import AgentNode -from core.workflow.nodes.code import CodeNode -from core.workflow.nodes.document_extractor import DocumentExtractorNode -from core.workflow.nodes.http_request import HttpRequestNode -from core.workflow.nodes.knowledge_retrieval import KnowledgeRetrievalNode -from core.workflow.nodes.llm import LLMNode -from core.workflow.nodes.llm.protocols import CredentialsProvider, ModelFactory -from core.workflow.nodes.parameter_extractor import ParameterExtractorNode -from core.workflow.nodes.question_classifier import QuestionClassifierNode -from core.workflow.nodes.template_transform import TemplateTransformNode -from core.workflow.nodes.tool import ToolNode +from core.workflow.nodes.knowledge_retrieval.knowledge_retrieval_node import KnowledgeRetrievalNode +from dify_graph.enums import WorkflowNodeExecutionMetadataKey, WorkflowNodeExecutionStatus +from dify_graph.model_runtime.entities.llm_entities import LLMUsage +from dify_graph.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent +from dify_graph.nodes.code import CodeNode +from dify_graph.nodes.document_extractor import DocumentExtractorNode +from dify_graph.nodes.http_request import HttpRequestNode +from dify_graph.nodes.llm import LLMNode +from dify_graph.nodes.llm.protocols import CredentialsProvider, ModelFactory, TemplateRenderer +from dify_graph.nodes.parameter_extractor import ParameterExtractorNode +from dify_graph.nodes.protocols import HttpClientProtocol, ToolFileManagerProtocol +from dify_graph.nodes.question_classifier import QuestionClassifierNode +from dify_graph.nodes.template_transform import TemplateTransformNode +from dify_graph.nodes.template_transform.template_renderer import ( + Jinja2TemplateRenderer, + TemplateRenderError, +) +from dify_graph.nodes.tool import ToolNode if TYPE_CHECKING: - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState from .test_mock_config import MockConfig +class _TestJinja2Renderer(Jinja2TemplateRenderer): + """Simple Jinja2 renderer for tests (avoids code executor).""" + + def render_template(self, template: str, variables: Mapping[str, Any]) -> str: + from jinja2 import Template as _Jinja2Template + + try: + return _Jinja2Template(template).render(**variables) + except Exception as exc: # pragma: no cover - pass through as contract error + raise TemplateRenderError(str(exc)) from exc + + class MockNodeMixin: """Mixin providing common mock functionality.""" @@ -49,6 +66,28 @@ class MockNodeMixin: kwargs.setdefault("credentials_provider", MagicMock(spec=CredentialsProvider)) kwargs.setdefault("model_factory", MagicMock(spec=ModelFactory)) kwargs.setdefault("model_instance", MagicMock(spec=ModelInstance)) + # LLM-like nodes now require an http_client; provide a mock by default for tests. + kwargs.setdefault("http_client", MagicMock(spec=HttpClientProtocol)) + if isinstance(self, (LLMNode, QuestionClassifierNode)): + kwargs.setdefault("template_renderer", MagicMock(spec=TemplateRenderer)) + + # Ensure TemplateTransformNode receives a renderer now required by constructor + if isinstance(self, TemplateTransformNode): + kwargs.setdefault("template_renderer", _TestJinja2Renderer()) + + # Provide default tool_file_manager_factory for ToolNode subclasses + from dify_graph.nodes.tool import ToolNode as _ToolNode # local import to avoid cycles + + if isinstance(self, _ToolNode): + kwargs.setdefault("tool_file_manager_factory", MagicMock(spec=ToolFileManagerProtocol)) + + if isinstance(self, AgentNode): + presentation_provider = MagicMock() + presentation_provider.get_icon.return_value = None + kwargs.setdefault("strategy_resolver", MagicMock()) + kwargs.setdefault("presentation_provider", presentation_provider) + kwargs.setdefault("runtime_support", MagicMock()) + kwargs.setdefault("message_transformer", MagicMock()) super().__init__( id=id, @@ -557,8 +596,8 @@ class MockDocumentExtractorNode(MockNodeMixin, DocumentExtractorNode): ) -from core.workflow.nodes.iteration import IterationNode -from core.workflow.nodes.loop import LoopNode +from dify_graph.nodes.iteration import IterationNode +from dify_graph.nodes.loop import LoopNode class MockIterationNode(MockNodeMixin, IterationNode): @@ -572,24 +611,20 @@ class MockIterationNode(MockNodeMixin, IterationNode): def _create_graph_engine(self, index: int, item: Any): """Create a graph engine with MockNodeFactory instead of DifyNodeFactory.""" # Import dependencies - from core.workflow.entities import GraphInitParams - from core.workflow.graph import Graph - from core.workflow.graph_engine import GraphEngine, GraphEngineConfig - from core.workflow.graph_engine.command_channels import InMemoryChannel - from core.workflow.runtime import GraphRuntimeState + from dify_graph.entities import GraphInitParams + from dify_graph.graph import Graph + from dify_graph.graph_engine import GraphEngine, GraphEngineConfig + from dify_graph.graph_engine.command_channels import InMemoryChannel + from dify_graph.runtime import GraphRuntimeState # Import our MockNodeFactory instead of DifyNodeFactory from .test_mock_factory import MockNodeFactory # Create GraphInitParams from node attributes graph_init_params = GraphInitParams( - tenant_id=self.tenant_id, - app_id=self.app_id, workflow_id=self.workflow_id, graph_config=self.graph_config, - user_id=self.user_id, - user_from=self.user_from.value, - invoke_from=self.invoke_from.value, + run_context=self.run_context, call_depth=self.workflow_call_depth, ) @@ -621,7 +656,7 @@ class MockIterationNode(MockNodeMixin, IterationNode): ) if not iteration_graph: - from core.workflow.nodes.iteration.exc import IterationGraphNotFoundError + from dify_graph.nodes.iteration.exc import IterationGraphNotFoundError raise IterationGraphNotFoundError("iteration graph not found") @@ -648,24 +683,20 @@ class MockLoopNode(MockNodeMixin, LoopNode): def _create_graph_engine(self, start_at, root_node_id: str): """Create a graph engine with MockNodeFactory instead of DifyNodeFactory.""" # Import dependencies - from core.workflow.entities import GraphInitParams - from core.workflow.graph import Graph - from core.workflow.graph_engine import GraphEngine, GraphEngineConfig - from core.workflow.graph_engine.command_channels import InMemoryChannel - from core.workflow.runtime import GraphRuntimeState + from dify_graph.entities import GraphInitParams + from dify_graph.graph import Graph + from dify_graph.graph_engine import GraphEngine, GraphEngineConfig + from dify_graph.graph_engine.command_channels import InMemoryChannel + from dify_graph.runtime import GraphRuntimeState # Import our MockNodeFactory instead of DifyNodeFactory from .test_mock_factory import MockNodeFactory # Create GraphInitParams from node attributes graph_init_params = GraphInitParams( - tenant_id=self.tenant_id, - app_id=self.app_id, workflow_id=self.workflow_id, graph_config=self.graph_config, - user_id=self.user_id, - user_from=self.user_from.value, - invoke_from=self.invoke_from.value, + run_context=self.run_context, call_depth=self.workflow_call_depth, ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py index 6c4178dfed..a8398e8f79 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_nodes_template_code.py @@ -6,8 +6,9 @@ to ensure they work correctly with the TableTestRunner. """ from configs import dify_config -from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus -from core.workflow.nodes.code.limits import CodeNodeLimits +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus +from dify_graph.nodes.code.limits import CodeNodeLimits from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory from tests.unit_tests.core.workflow.graph_engine.test_mock_nodes import MockCodeNode, MockTemplateTransformNode @@ -39,18 +40,22 @@ class TestMockTemplateTransformNode: def test_mock_template_transform_node_default_output(self): """Test that MockTemplateTransformNode processes templates with Jinja2.""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -98,18 +103,22 @@ class TestMockTemplateTransformNode: def test_mock_template_transform_node_custom_output(self): """Test that MockTemplateTransformNode returns custom configured output.""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -158,18 +167,22 @@ class TestMockTemplateTransformNode: def test_mock_template_transform_node_error_simulation(self): """Test that MockTemplateTransformNode can simulate errors.""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -215,19 +228,23 @@ class TestMockTemplateTransformNode: def test_mock_template_transform_node_with_variables(self): """Test that MockTemplateTransformNode processes templates with variables.""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool - from core.workflow.variables import StringVariable + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool + from dify_graph.variables import StringVariable # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -281,18 +298,22 @@ class TestMockCodeNode: def test_mock_code_node_default_output(self): """Test that MockCodeNode returns default output.""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -343,18 +364,22 @@ class TestMockCodeNode: def test_mock_code_node_with_output_schema(self): """Test that MockCodeNode generates outputs based on schema.""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -413,18 +438,22 @@ class TestMockCodeNode: def test_mock_code_node_custom_output(self): """Test that MockCodeNode returns custom configured output.""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -485,18 +514,22 @@ class TestMockNodeFactory: def test_code_and_template_nodes_mocked_by_default(self): """Test that CODE and TEMPLATE_TRANSFORM nodes are mocked by default (they require SSRF proxy).""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -517,27 +550,31 @@ class TestMockNodeFactory: ) # Verify that CODE and TEMPLATE_TRANSFORM ARE mocked by default (they require SSRF proxy) - assert factory.should_mock_node(NodeType.CODE) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Verify that other third-party service nodes ARE also mocked by default - assert factory.should_mock_node(NodeType.LLM) - assert factory.should_mock_node(NodeType.AGENT) + assert factory.should_mock_node(BuiltinNodeTypes.LLM) + assert factory.should_mock_node(BuiltinNodeTypes.AGENT) def test_factory_creates_mock_template_transform_node(self): """Test that MockNodeFactory creates MockTemplateTransformNode for template-transform type.""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -573,22 +610,26 @@ class TestMockNodeFactory: # Verify the correct mock type was created assert isinstance(node, MockTemplateTransformNode) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) def test_factory_creates_mock_code_node(self): """Test that MockNodeFactory creates MockCodeNode for code type.""" - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool # Create test parameters graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config={}, - user_id="test_user", - user_from="account", - invoke_from="debugger", + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": "account", + "invoke_from": "debugger", + } + }, call_depth=0, ) @@ -626,4 +667,4 @@ class TestMockNodeFactory: # Verify the correct mock type was created assert isinstance(node, MockCodeNode) - assert factory.should_mock_node(NodeType.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py index 1b781545f5..5b35b3310a 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_mock_simple.py @@ -3,13 +3,9 @@ Simple test to validate the auto-mock system without external dependencies. """ import sys -from pathlib import Path -# Add api directory to path -api_dir = Path(__file__).parent.parent.parent.parent.parent.parent -sys.path.insert(0, str(api_dir)) - -from core.workflow.enums import NodeType +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY +from dify_graph.enums import BuiltinNodeTypes from tests.unit_tests.core.workflow.graph_engine.test_mock_config import MockConfig, MockConfigBuilder, NodeMockConfig from tests.unit_tests.core.workflow.graph_engine.test_mock_factory import MockNodeFactory @@ -68,8 +64,8 @@ def test_mock_config_operations(): assert error_config.error == "Test error" # Test default configs by node type - config.set_default_config(NodeType.LLM, {"temperature": 0.7}) - llm_config = config.get_default_config(NodeType.LLM) + config.set_default_config(BuiltinNodeTypes.LLM, {"temperature": 0.7}) + llm_config = config.get_default_config(BuiltinNodeTypes.LLM) assert llm_config == {"temperature": 0.7} print("✓ MockConfig operations test passed") @@ -101,21 +97,24 @@ def test_node_mock_config(): def test_mock_factory_detection(): """Test MockNodeFactory node type detection.""" - from core.app.entities.app_invoke_entities import InvokeFrom - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool - from models.enums import UserFrom + from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool print("Testing MockNodeFactory detection...") graph_init_params = GraphInitParams( - tenant_id="test", - app_id="test", workflow_id="test", graph_config={}, - user_id="test", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.SERVICE_API, + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test", + "app_id": "test", + "user_id": "test", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.SERVICE_API, + } + }, call_depth=0, ) graph_runtime_state = GraphRuntimeState( @@ -131,44 +130,47 @@ def test_mock_factory_detection(): ) # Test that third-party service nodes are identified for mocking - assert factory.should_mock_node(NodeType.LLM) - assert factory.should_mock_node(NodeType.AGENT) - assert factory.should_mock_node(NodeType.TOOL) - assert factory.should_mock_node(NodeType.KNOWLEDGE_RETRIEVAL) - assert factory.should_mock_node(NodeType.HTTP_REQUEST) - assert factory.should_mock_node(NodeType.PARAMETER_EXTRACTOR) - assert factory.should_mock_node(NodeType.DOCUMENT_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.LLM) + assert factory.should_mock_node(BuiltinNodeTypes.AGENT) + assert factory.should_mock_node(BuiltinNodeTypes.TOOL) + assert factory.should_mock_node(BuiltinNodeTypes.KNOWLEDGE_RETRIEVAL) + assert factory.should_mock_node(BuiltinNodeTypes.HTTP_REQUEST) + assert factory.should_mock_node(BuiltinNodeTypes.PARAMETER_EXTRACTOR) + assert factory.should_mock_node(BuiltinNodeTypes.DOCUMENT_EXTRACTOR) # Test that CODE and TEMPLATE_TRANSFORM are mocked (they require SSRF proxy) - assert factory.should_mock_node(NodeType.CODE) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.CODE) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Test that non-service nodes are not mocked - assert not factory.should_mock_node(NodeType.START) - assert not factory.should_mock_node(NodeType.END) - assert not factory.should_mock_node(NodeType.IF_ELSE) - assert not factory.should_mock_node(NodeType.VARIABLE_AGGREGATOR) + assert not factory.should_mock_node(BuiltinNodeTypes.START) + assert not factory.should_mock_node(BuiltinNodeTypes.END) + assert not factory.should_mock_node(BuiltinNodeTypes.IF_ELSE) + assert not factory.should_mock_node(BuiltinNodeTypes.VARIABLE_AGGREGATOR) print("✓ MockNodeFactory detection test passed") def test_mock_factory_registration(): """Test registering and unregistering mock node types.""" - from core.app.entities.app_invoke_entities import InvokeFrom - from core.workflow.entities import GraphInitParams - from core.workflow.runtime import GraphRuntimeState, VariablePool - from models.enums import UserFrom + from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom + from dify_graph.entities import GraphInitParams + from dify_graph.runtime import GraphRuntimeState, VariablePool print("Testing MockNodeFactory registration...") graph_init_params = GraphInitParams( - tenant_id="test", - app_id="test", workflow_id="test", graph_config={}, - user_id="test", - user_from=UserFrom.ACCOUNT, - invoke_from=InvokeFrom.SERVICE_API, + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test", + "app_id": "test", + "user_id": "test", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.SERVICE_API, + } + }, call_depth=0, ) graph_runtime_state = GraphRuntimeState( @@ -184,18 +186,18 @@ def test_mock_factory_registration(): ) # TEMPLATE_TRANSFORM is mocked by default (requires SSRF proxy) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Unregister mock - factory.unregister_mock_node_type(NodeType.TEMPLATE_TRANSFORM) - assert not factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.unregister_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM) + assert not factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) # Register custom mock (using a dummy class for testing) class DummyMockNode: pass - factory.register_mock_node_type(NodeType.TEMPLATE_TRANSFORM, DummyMockNode) - assert factory.should_mock_node(NodeType.TEMPLATE_TRANSFORM) + factory.register_mock_node_type(BuiltinNodeTypes.TEMPLATE_TRANSFORM, DummyMockNode) + assert factory.should_mock_node(BuiltinNodeTypes.TEMPLATE_TRANSFORM) print("✓ MockNodeFactory registration test passed") diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_human_input_join_resume.py b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_human_input_join_resume.py index a6aab81f6c..e681b39cc7 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_human_input_join_resume.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_human_input_join_resume.py @@ -4,34 +4,34 @@ from dataclasses import dataclass from datetime import datetime, timedelta from typing import Any, Protocol -from core.workflow.entities import GraphInitParams -from core.workflow.entities.workflow_start_reason import WorkflowStartReason -from core.workflow.graph import Graph -from core.workflow.graph_engine.command_channels.in_memory_channel import InMemoryChannel -from core.workflow.graph_engine.config import GraphEngineConfig -from core.workflow.graph_engine.graph_engine import GraphEngine -from core.workflow.graph_events import ( +from dify_graph.entities.workflow_start_reason import WorkflowStartReason +from dify_graph.graph import Graph +from dify_graph.graph_engine.command_channels.in_memory_channel import InMemoryChannel +from dify_graph.graph_engine.config import GraphEngineConfig +from dify_graph.graph_engine.graph_engine import GraphEngine +from dify_graph.graph_events import ( GraphRunPausedEvent, GraphRunStartedEvent, GraphRunSucceededEvent, NodeRunSucceededEvent, ) -from core.workflow.nodes.base.entities import OutputVariableEntity -from core.workflow.nodes.end.end_node import EndNode -from core.workflow.nodes.end.entities import EndNodeData -from core.workflow.nodes.human_input.entities import HumanInputNodeData, UserAction -from core.workflow.nodes.human_input.enums import HumanInputFormStatus -from core.workflow.nodes.human_input.human_input_node import HumanInputNode -from core.workflow.nodes.start.entities import StartNodeData -from core.workflow.nodes.start.start_node import StartNode -from core.workflow.repositories.human_input_form_repository import ( +from dify_graph.nodes.base.entities import OutputVariableEntity +from dify_graph.nodes.end.end_node import EndNode +from dify_graph.nodes.end.entities import EndNodeData +from dify_graph.nodes.human_input.entities import HumanInputNodeData, UserAction +from dify_graph.nodes.human_input.enums import HumanInputFormStatus +from dify_graph.nodes.human_input.human_input_node import HumanInputNode +from dify_graph.nodes.start.entities import StartNodeData +from dify_graph.nodes.start.start_node import StartNode +from dify_graph.repositories.human_input_form_repository import ( FormCreateParams, HumanInputFormEntity, HumanInputFormRepository, ) -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable from libs.datetime_utils import naive_utc_now +from tests.workflow_test_utils import build_test_graph_init_params class PauseStateStore(Protocol): @@ -126,11 +126,11 @@ def _build_runtime_state() -> GraphRuntimeState: def _build_graph(runtime_state: GraphRuntimeState, repo: HumanInputFormRepository) -> Graph: graph_config: dict[str, object] = {"nodes": [], "edges": []} - graph_init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", + graph_init_params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", user_from="account", invoke_from="debugger", diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_human_input_pause_missing_finish.py b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_human_input_pause_missing_finish.py index 62aa56fc57..60167c0441 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_human_input_pause_missing_finish.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_human_input_pause_missing_finish.py @@ -4,41 +4,41 @@ from dataclasses import dataclass from datetime import datetime, timedelta from typing import Any -from core.model_runtime.entities.llm_entities import LLMMode -from core.model_runtime.entities.message_entities import PromptMessageRole -from core.workflow.entities import GraphInitParams -from core.workflow.entities.workflow_start_reason import WorkflowStartReason -from core.workflow.graph import Graph -from core.workflow.graph_engine.command_channels.in_memory_channel import InMemoryChannel -from core.workflow.graph_engine.config import GraphEngineConfig -from core.workflow.graph_engine.graph_engine import GraphEngine -from core.workflow.graph_events import ( +from dify_graph.entities.workflow_start_reason import WorkflowStartReason +from dify_graph.graph import Graph +from dify_graph.graph_engine.command_channels.in_memory_channel import InMemoryChannel +from dify_graph.graph_engine.config import GraphEngineConfig +from dify_graph.graph_engine.graph_engine import GraphEngine +from dify_graph.graph_events import ( GraphRunPausedEvent, GraphRunStartedEvent, NodeRunPauseRequestedEvent, NodeRunStartedEvent, NodeRunSucceededEvent, ) -from core.workflow.nodes.human_input.entities import HumanInputNodeData, UserAction -from core.workflow.nodes.human_input.enums import HumanInputFormStatus -from core.workflow.nodes.human_input.human_input_node import HumanInputNode -from core.workflow.nodes.llm.entities import ( +from dify_graph.model_runtime.entities.llm_entities import LLMMode +from dify_graph.model_runtime.entities.message_entities import PromptMessageRole +from dify_graph.nodes.human_input.entities import HumanInputNodeData, UserAction +from dify_graph.nodes.human_input.enums import HumanInputFormStatus +from dify_graph.nodes.human_input.human_input_node import HumanInputNode +from dify_graph.nodes.llm.entities import ( ContextConfig, LLMNodeChatModelMessage, LLMNodeData, ModelConfig, VisionConfig, ) -from core.workflow.nodes.start.entities import StartNodeData -from core.workflow.nodes.start.start_node import StartNode -from core.workflow.repositories.human_input_form_repository import ( +from dify_graph.nodes.start.entities import StartNodeData +from dify_graph.nodes.start.start_node import StartNode +from dify_graph.repositories.human_input_form_repository import ( FormCreateParams, HumanInputFormEntity, HumanInputFormRepository, ) -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable from libs.datetime_utils import naive_utc_now +from tests.workflow_test_utils import build_test_graph_init_params from .test_mock_config import MockConfig, NodeMockConfig from .test_mock_nodes import MockLLMNode @@ -129,11 +129,11 @@ def _build_runtime_state() -> GraphRuntimeState: def _build_graph(runtime_state: GraphRuntimeState, repo: HumanInputFormRepository, mock_config: MockConfig) -> Graph: graph_config: dict[str, object] = {"nodes": [], "edges": []} - graph_init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", + graph_init_params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", user_from="account", invoke_from="debugger", diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py index a93d03c87e..b954a4faac 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_parallel_streaming_workflow.py @@ -12,25 +12,24 @@ import time from unittest.mock import MagicMock, patch from uuid import uuid4 -from core.app.entities.app_invoke_entities import InvokeFrom -from core.app.workflow.node_factory import DifyNodeFactory +from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.model_manager import ModelInstance -from core.workflow.entities import GraphInitParams -from core.workflow.enums import NodeType, WorkflowNodeExecutionStatus -from core.workflow.graph import Graph -from core.workflow.graph_engine import GraphEngine, GraphEngineConfig -from core.workflow.graph_engine.command_channels import InMemoryChannel -from core.workflow.graph_events import ( +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus +from dify_graph.graph import Graph +from dify_graph.graph_engine import GraphEngine, GraphEngineConfig +from dify_graph.graph_engine.command_channels import InMemoryChannel +from dify_graph.graph_events import ( GraphRunSucceededEvent, NodeRunStartedEvent, NodeRunStreamChunkEvent, NodeRunSucceededEvent, ) -from core.workflow.node_events import NodeRunResult, StreamCompletedEvent -from core.workflow.nodes.llm.node import LLMNode -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable -from models.enums import UserFrom +from dify_graph.node_events import NodeRunResult, StreamCompletedEvent +from dify_graph.nodes.llm.node import LLMNode +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable +from tests.workflow_test_utils import build_test_graph_init_params from .test_table_runner import TableTestRunner @@ -87,11 +86,11 @@ def test_parallel_streaming_workflow(): graph_config = workflow_config.get("graph", {}) # Create graph initialization parameters - init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", + init_params = build_test_graph_init_params( workflow_id="test_workflow", graph_config=graph_config, + tenant_id="test_tenant", + app_id="test_app", user_id="test_user", user_from=UserFrom.ACCOUNT, invoke_from=InvokeFrom.WEB_APP, @@ -100,8 +99,8 @@ def test_parallel_streaming_workflow(): # Create variable pool with system variables system_variables = SystemVariable( - user_id=init_params.user_id, - app_id=init_params.app_id, + user_id="test_user", + app_id="test_app", workflow_id=init_params.workflow_id, files=[], query="Tell me about yourself", # User query @@ -119,7 +118,11 @@ def test_parallel_streaming_workflow(): with patch.object( DifyNodeFactory, "_build_model_instance_for_llm_node", return_value=MagicMock(spec=ModelInstance), autospec=True ): - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init( + graph_config=graph_config, + node_factory=node_factory, + root_node_id=get_default_root_node_id(graph_config), + ) # Create the graph engine engine = GraphEngine( @@ -165,7 +168,9 @@ def test_parallel_streaming_workflow(): stream_chunk_events = [e for e in events if isinstance(e, NodeRunStreamChunkEvent)] # Get Answer node start event - answer_start_events = [e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == NodeType.ANSWER] + answer_start_events = [ + e for e in events if isinstance(e, NodeRunStartedEvent) and e.node_type == BuiltinNodeTypes.ANSWER + ] assert len(answer_start_events) == 1, f"Expected 1 Answer node start event, got {len(answer_start_events)}" answer_start_event = answer_start_events[0] @@ -212,7 +217,9 @@ def test_parallel_streaming_workflow(): # Get LLM completion events llm_completed_events = [ - (i, e) for i, e in enumerate(events) if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.LLM + (i, e) + for i, e in enumerate(events) + if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.LLM ] # Check LLM completion order - in the current implementation, LLMs run sequentially @@ -264,7 +271,7 @@ def test_parallel_streaming_workflow(): # According to Answer node configuration: '{{#1754339725656.text#}}{{#1754339718571.text#}}' # This means LLM 2 output should come first, then LLM 1 output answer_complete_events = [ - e for e in events if isinstance(e, NodeRunSucceededEvent) and e.node_type == NodeType.ANSWER + e for e in events if isinstance(e, NodeRunSucceededEvent) and e.node_type == BuiltinNodeTypes.ANSWER ] assert len(answer_complete_events) == 1, f"Expected 1 Answer completion event, got {len(answer_complete_events)}" diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_pause_deferred_ready_nodes.py b/api/tests/unit_tests/core/workflow/graph_engine/test_pause_deferred_ready_nodes.py index 156cfefcd6..7328ce443f 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_pause_deferred_ready_nodes.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_pause_deferred_ready_nodes.py @@ -4,42 +4,42 @@ from dataclasses import dataclass from datetime import datetime, timedelta from typing import Any -from core.model_runtime.entities.llm_entities import LLMMode -from core.model_runtime.entities.message_entities import PromptMessageRole -from core.workflow.entities import GraphInitParams -from core.workflow.entities.workflow_start_reason import WorkflowStartReason -from core.workflow.graph import Graph -from core.workflow.graph_engine.command_channels.in_memory_channel import InMemoryChannel -from core.workflow.graph_engine.config import GraphEngineConfig -from core.workflow.graph_engine.graph_engine import GraphEngine -from core.workflow.graph_events import ( +from dify_graph.entities.workflow_start_reason import WorkflowStartReason +from dify_graph.graph import Graph +from dify_graph.graph_engine.command_channels.in_memory_channel import InMemoryChannel +from dify_graph.graph_engine.config import GraphEngineConfig +from dify_graph.graph_engine.graph_engine import GraphEngine +from dify_graph.graph_events import ( GraphRunPausedEvent, GraphRunStartedEvent, NodeRunStartedEvent, NodeRunSucceededEvent, ) -from core.workflow.nodes.end.end_node import EndNode -from core.workflow.nodes.end.entities import EndNodeData -from core.workflow.nodes.human_input.entities import HumanInputNodeData, UserAction -from core.workflow.nodes.human_input.enums import HumanInputFormStatus -from core.workflow.nodes.human_input.human_input_node import HumanInputNode -from core.workflow.nodes.llm.entities import ( +from dify_graph.model_runtime.entities.llm_entities import LLMMode +from dify_graph.model_runtime.entities.message_entities import PromptMessageRole +from dify_graph.nodes.end.end_node import EndNode +from dify_graph.nodes.end.entities import EndNodeData +from dify_graph.nodes.human_input.entities import HumanInputNodeData, UserAction +from dify_graph.nodes.human_input.enums import HumanInputFormStatus +from dify_graph.nodes.human_input.human_input_node import HumanInputNode +from dify_graph.nodes.llm.entities import ( ContextConfig, LLMNodeChatModelMessage, LLMNodeData, ModelConfig, VisionConfig, ) -from core.workflow.nodes.start.entities import StartNodeData -from core.workflow.nodes.start.start_node import StartNode -from core.workflow.repositories.human_input_form_repository import ( +from dify_graph.nodes.start.entities import StartNodeData +from dify_graph.nodes.start.start_node import StartNode +from dify_graph.repositories.human_input_form_repository import ( FormCreateParams, HumanInputFormEntity, HumanInputFormRepository, ) -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable from libs.datetime_utils import naive_utc_now +from tests.workflow_test_utils import build_test_graph_init_params from .test_mock_config import MockConfig, NodeMockConfig from .test_mock_nodes import MockLLMNode @@ -121,11 +121,11 @@ def _build_runtime_state() -> GraphRuntimeState: def _build_graph(runtime_state: GraphRuntimeState, repo: HumanInputFormRepository, mock_config: MockConfig) -> Graph: graph_config: dict[str, object] = {"nodes": [], "edges": []} - graph_init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", + graph_init_params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", user_from="account", invoke_from="debugger", diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_pause_resume_state.py b/api/tests/unit_tests/core/workflow/graph_engine/test_pause_resume_state.py index 700b3f4b8b..15a7de3c52 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_pause_resume_state.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_pause_resume_state.py @@ -3,33 +3,33 @@ import time from typing import Any from unittest.mock import MagicMock -from core.workflow.entities import GraphInitParams -from core.workflow.entities.workflow_start_reason import WorkflowStartReason -from core.workflow.graph import Graph -from core.workflow.graph_engine.command_channels.in_memory_channel import InMemoryChannel -from core.workflow.graph_engine.graph_engine import GraphEngine -from core.workflow.graph_events import ( +from dify_graph.entities.workflow_start_reason import WorkflowStartReason +from dify_graph.graph import Graph +from dify_graph.graph_engine.command_channels.in_memory_channel import InMemoryChannel +from dify_graph.graph_engine.graph_engine import GraphEngine +from dify_graph.graph_events import ( GraphEngineEvent, GraphRunPausedEvent, GraphRunSucceededEvent, NodeRunStartedEvent, NodeRunSucceededEvent, ) -from core.workflow.graph_events.graph import GraphRunStartedEvent -from core.workflow.nodes.base.entities import OutputVariableEntity -from core.workflow.nodes.end.end_node import EndNode -from core.workflow.nodes.end.entities import EndNodeData -from core.workflow.nodes.human_input.entities import HumanInputNodeData, UserAction -from core.workflow.nodes.human_input.human_input_node import HumanInputNode -from core.workflow.nodes.start.entities import StartNodeData -from core.workflow.nodes.start.start_node import StartNode -from core.workflow.repositories.human_input_form_repository import ( +from dify_graph.graph_events.graph import GraphRunStartedEvent +from dify_graph.nodes.base.entities import OutputVariableEntity +from dify_graph.nodes.end.end_node import EndNode +from dify_graph.nodes.end.entities import EndNodeData +from dify_graph.nodes.human_input.entities import HumanInputNodeData, UserAction +from dify_graph.nodes.human_input.human_input_node import HumanInputNode +from dify_graph.nodes.start.entities import StartNodeData +from dify_graph.nodes.start.start_node import StartNode +from dify_graph.repositories.human_input_form_repository import ( HumanInputFormEntity, HumanInputFormRepository, ) -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable from libs.datetime_utils import naive_utc_now +from tests.workflow_test_utils import build_test_graph_init_params def _build_runtime_state() -> GraphRuntimeState: @@ -79,11 +79,11 @@ def _build_human_input_graph( form_repository: HumanInputFormRepository, ) -> Graph: graph_config: dict[str, object] = {"nodes": [], "edges": []} - params = GraphInitParams( - tenant_id="tenant", - app_id="app", + params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", user_from="account", invoke_from="service-api", diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_redis_stop_integration.py b/api/tests/unit_tests/core/workflow/graph_engine/test_redis_stop_integration.py index 0920940e51..9c84f42db6 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_redis_stop_integration.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_redis_stop_integration.py @@ -12,9 +12,9 @@ import pytest import redis from core.app.apps.base_app_queue_manager import AppQueueManager -from core.workflow.graph_engine.command_channels.redis_channel import RedisChannel -from core.workflow.graph_engine.entities.commands import AbortCommand, CommandType, PauseCommand -from core.workflow.graph_engine.manager import GraphEngineManager +from dify_graph.graph_engine.command_channels.redis_channel import RedisChannel +from dify_graph.graph_engine.entities.commands import AbortCommand, CommandType, PauseCommand +from dify_graph.graph_engine.manager import GraphEngineManager class TestRedisStopIntegration: diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_response_session.py b/api/tests/unit_tests/core/workflow/graph_engine/test_response_session.py new file mode 100644 index 0000000000..cd9d56f683 --- /dev/null +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_response_session.py @@ -0,0 +1,55 @@ +"""Unit tests for response session creation.""" + +from __future__ import annotations + +import pytest + +from dify_graph.enums import BuiltinNodeTypes, NodeExecutionType, NodeState, NodeType +from dify_graph.graph_engine.response_coordinator.session import ResponseSession +from dify_graph.nodes.base.template import Template, TextSegment + + +class DummyResponseNode: + """Minimal response-capable node for session tests.""" + + def __init__(self, *, node_id: str, node_type: NodeType, template: Template) -> None: + self.id = node_id + self.node_type = node_type + self.execution_type = NodeExecutionType.RESPONSE + self.state = NodeState.UNKNOWN + self._template = template + + def get_streaming_template(self) -> Template: + return self._template + + +class DummyNodeWithoutStreamingTemplate: + """Minimal node that violates the response-session contract.""" + + def __init__(self, *, node_id: str, node_type: NodeType) -> None: + self.id = node_id + self.node_type = node_type + self.execution_type = NodeExecutionType.RESPONSE + self.state = NodeState.UNKNOWN + + +def test_response_session_from_node_accepts_nodes_outside_previous_allowlist() -> None: + """Session creation depends on the streaming-template contract rather than node type.""" + node = DummyResponseNode( + node_id="llm-node", + node_type=BuiltinNodeTypes.LLM, + template=Template(segments=[TextSegment(text="hello")]), + ) + + session = ResponseSession.from_node(node) + + assert session.node_id == "llm-node" + assert session.template.segments == [TextSegment(text="hello")] + + +def test_response_session_from_node_requires_streaming_template_method() -> None: + """Allowed node types still need to implement the streaming-template contract.""" + node = DummyNodeWithoutStreamingTemplate(node_id="answer-node", node_type=BuiltinNodeTypes.ANSWER) + + with pytest.raises(TypeError, match="get_streaming_template"): + ResponseSession.from_node(node) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_streaming_conversation_variables.py b/api/tests/unit_tests/core/workflow/graph_engine/test_streaming_conversation_variables.py index 99157a7c3e..4f1741d4fb 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_streaming_conversation_variables.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_streaming_conversation_variables.py @@ -1,4 +1,4 @@ -from core.workflow.graph_events import ( +from dify_graph.graph_events import ( GraphRunStartedEvent, GraphRunSucceededEvent, NodeRunStartedEvent, diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py b/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py index 5cbb7cf36e..ab8fb346b8 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_table_runner.py @@ -12,27 +12,29 @@ This module provides a robust table-driven testing framework with support for: import logging import time -from collections.abc import Callable, Sequence +from collections.abc import Callable, Mapping, Sequence from concurrent.futures import ThreadPoolExecutor, as_completed from dataclasses import dataclass, field from functools import lru_cache from pathlib import Path -from typing import Any +from typing import Any, cast -from core.app.workflow.node_factory import DifyNodeFactory +from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.tools.utils.yaml_utils import _load_yaml_file -from core.workflow.entities.graph_init_params import GraphInitParams -from core.workflow.graph import Graph -from core.workflow.graph_engine import GraphEngine, GraphEngineConfig -from core.workflow.graph_engine.command_channels import InMemoryChannel -from core.workflow.graph_events import ( +from core.workflow.node_factory import DifyNodeFactory, get_default_root_node_id +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY, GraphInitParams +from dify_graph.graph import Graph +from dify_graph.graph_engine import GraphEngine, GraphEngineConfig +from dify_graph.graph_engine.command_channels import InMemoryChannel +from dify_graph.graph_engine.layers.base import GraphEngineLayer +from dify_graph.graph_events import ( GraphEngineEvent, GraphRunStartedEvent, GraphRunSucceededEvent, ) -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable -from core.workflow.variables import ( +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable +from dify_graph.variables import ( ArrayNumberVariable, ArrayObjectVariable, ArrayStringVariable, @@ -48,6 +50,47 @@ from .test_mock_factory import MockNodeFactory logger = logging.getLogger(__name__) +class _TableTestChildEngineBuilder: + def __init__(self, *, use_mock_factory: bool, mock_config: MockConfig | None) -> None: + self._use_mock_factory = use_mock_factory + self._mock_config = mock_config + + def build_child_engine( + self, + *, + workflow_id: str, + graph_init_params: GraphInitParams, + graph_runtime_state: GraphRuntimeState, + graph_config: Mapping[str, Any], + root_node_id: str, + layers: Sequence[object] = (), + ) -> GraphEngine: + if self._use_mock_factory: + node_factory = MockNodeFactory( + graph_init_params=graph_init_params, + graph_runtime_state=graph_runtime_state, + mock_config=self._mock_config, + ) + else: + node_factory = DifyNodeFactory(graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state) + + child_graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id=root_node_id) + if not child_graph: + raise ValueError("child graph not found") + + child_engine = GraphEngine( + workflow_id=workflow_id, + graph=child_graph, + graph_runtime_state=graph_runtime_state, + command_channel=InMemoryChannel(), + config=GraphEngineConfig(), + child_engine_builder=self, + ) + for layer in layers: + child_engine.layer(cast(GraphEngineLayer, layer)) + return child_engine + + @dataclass class WorkflowTestCase: """Represents a single test case for table-driven testing.""" @@ -149,19 +192,23 @@ class WorkflowRunner: raise ValueError("Fixture missing workflow.graph configuration") graph_init_params = GraphInitParams( - tenant_id="test_tenant", - app_id="test_app", workflow_id="test_workflow", graph_config=graph_config, - user_id="test_user", - user_from="account", - invoke_from="debugger", # Set to debugger to avoid conversation_id requirement + run_context={ + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "test_tenant", + "app_id": "test_app", + "user_id": "test_user", + "user_from": UserFrom.ACCOUNT, + "invoke_from": InvokeFrom.DEBUGGER, # Set to debugger to avoid conversation_id requirement + } + }, call_depth=0, ) system_variables = SystemVariable( - user_id=graph_init_params.user_id, - app_id=graph_init_params.app_id, + user_id="test_user", + app_id="test_app", workflow_id=graph_init_params.workflow_id, files=[], query=query, @@ -210,7 +257,11 @@ class WorkflowRunner: else: node_factory = DifyNodeFactory(graph_init_params=graph_init_params, graph_runtime_state=graph_runtime_state) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init( + graph_config=graph_config, + node_factory=node_factory, + root_node_id=get_default_root_node_id(graph_config), + ) return graph, graph_runtime_state @@ -315,6 +366,10 @@ class TableTestRunner: scale_up_threshold=self.graph_engine_scale_up_threshold, scale_down_idle_time=self.graph_engine_scale_down_idle_time, ), + child_engine_builder=_TableTestChildEngineBuilder( + use_mock_factory=test_case.use_auto_mock, + mock_config=test_case.mock_config, + ), ) # Execute and collect events diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_tool_in_chatflow.py b/api/tests/unit_tests/core/workflow/graph_engine/test_tool_in_chatflow.py index bfcc6e1a5f..7f26bc11a7 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_tool_in_chatflow.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_tool_in_chatflow.py @@ -1,6 +1,6 @@ -from core.workflow.graph_engine import GraphEngine, GraphEngineConfig -from core.workflow.graph_engine.command_channels import InMemoryChannel -from core.workflow.graph_events import ( +from dify_graph.graph_engine import GraphEngine, GraphEngineConfig +from dify_graph.graph_engine.command_channels import InMemoryChannel +from dify_graph.graph_events import ( GraphRunSucceededEvent, NodeRunStreamChunkEvent, ) diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_variable_aggregator.py b/api/tests/unit_tests/core/workflow/graph_engine/test_variable_aggregator.py index 221e1291d1..f63e8ff4ce 100644 --- a/api/tests/unit_tests/core/workflow/graph_engine/test_variable_aggregator.py +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_variable_aggregator.py @@ -2,9 +2,9 @@ from unittest.mock import patch import pytest -from core.workflow.enums import WorkflowNodeExecutionStatus -from core.workflow.node_events import NodeRunResult -from core.workflow.nodes.template_transform.template_transform_node import TemplateTransformNode +from dify_graph.enums import WorkflowNodeExecutionStatus +from dify_graph.node_events import NodeRunResult +from dify_graph.nodes.template_transform.template_transform_node import TemplateTransformNode from .test_table_runner import TableTestRunner, WorkflowTestCase diff --git a/api/tests/unit_tests/core/workflow/graph_engine/test_worker.py b/api/tests/unit_tests/core/workflow/graph_engine/test_worker.py new file mode 100644 index 0000000000..bc00b49fba --- /dev/null +++ b/api/tests/unit_tests/core/workflow/graph_engine/test_worker.py @@ -0,0 +1,145 @@ +import queue +from collections.abc import Generator +from datetime import UTC, datetime, timedelta +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from dify_graph.enums import BuiltinNodeTypes, WorkflowNodeExecutionStatus +from dify_graph.graph_engine.ready_queue import InMemoryReadyQueue +from dify_graph.graph_engine.worker import Worker +from dify_graph.graph_events import NodeRunFailedEvent, NodeRunStartedEvent + + +def test_build_fallback_failure_event_uses_naive_utc_and_failed_node_run_result(mocker) -> None: + fixed_time = datetime(2024, 1, 1, 12, 0, 0, tzinfo=UTC).replace(tzinfo=None) + mocker.patch("dify_graph.graph_engine.worker.naive_utc_now", return_value=fixed_time) + + worker = Worker( + ready_queue=InMemoryReadyQueue(), + event_queue=queue.Queue(), + graph=MagicMock(), + layers=[], + ) + node = SimpleNamespace( + execution_id="exec-1", + id="node-1", + node_type=BuiltinNodeTypes.LLM, + ) + + event = worker._build_fallback_failure_event(node, RuntimeError("boom")) + + assert event.start_at == fixed_time + assert event.finished_at == fixed_time + assert event.error == "boom" + assert event.node_run_result.status == WorkflowNodeExecutionStatus.FAILED + assert event.node_run_result.error == "boom" + assert event.node_run_result.error_type == "RuntimeError" + + +def test_worker_fallback_failure_event_reuses_observed_start_time() -> None: + start_at = datetime(2024, 1, 1, 12, 0, 0, tzinfo=UTC).replace(tzinfo=None) + failure_time = start_at + timedelta(seconds=5) + captured_events: list[NodeRunFailedEvent | NodeRunStartedEvent] = [] + + class FakeNode: + execution_id = "exec-1" + id = "node-1" + node_type = BuiltinNodeTypes.LLM + + def ensure_execution_id(self) -> str: + return self.execution_id + + def run(self) -> Generator[NodeRunStartedEvent, None, None]: + yield NodeRunStartedEvent( + id=self.execution_id, + node_id=self.id, + node_type=self.node_type, + node_title="LLM", + start_at=start_at, + ) + + worker = Worker( + ready_queue=MagicMock(), + event_queue=MagicMock(), + graph=MagicMock(nodes={"node-1": FakeNode()}), + layers=[], + ) + + worker._ready_queue.get.side_effect = ["node-1"] + + def put_side_effect(event: NodeRunFailedEvent | NodeRunStartedEvent) -> None: + captured_events.append(event) + if len(captured_events) == 1: + raise RuntimeError("queue boom") + worker.stop() + + worker._event_queue.put.side_effect = put_side_effect + + with patch("dify_graph.graph_engine.worker.naive_utc_now", return_value=failure_time): + worker.run() + + fallback_event = captured_events[-1] + + assert isinstance(fallback_event, NodeRunFailedEvent) + assert fallback_event.start_at == start_at + assert fallback_event.finished_at == failure_time + assert fallback_event.error == "queue boom" + assert fallback_event.node_run_result.status == WorkflowNodeExecutionStatus.FAILED + + +def test_worker_fallback_failure_event_ignores_nested_iteration_child_start_times() -> None: + parent_start = datetime(2024, 1, 1, 12, 0, 0, tzinfo=UTC).replace(tzinfo=None) + child_start = parent_start + timedelta(seconds=3) + failure_time = parent_start + timedelta(seconds=5) + captured_events: list[NodeRunFailedEvent | NodeRunStartedEvent] = [] + + class FakeIterationNode: + execution_id = "iteration-exec" + id = "iteration-node" + node_type = BuiltinNodeTypes.ITERATION + + def ensure_execution_id(self) -> str: + return self.execution_id + + def run(self) -> Generator[NodeRunStartedEvent, None, None]: + yield NodeRunStartedEvent( + id=self.execution_id, + node_id=self.id, + node_type=self.node_type, + node_title="Iteration", + start_at=parent_start, + ) + yield NodeRunStartedEvent( + id="child-exec", + node_id="child-node", + node_type=BuiltinNodeTypes.LLM, + node_title="LLM", + start_at=child_start, + in_iteration_id=self.id, + ) + + worker = Worker( + ready_queue=MagicMock(), + event_queue=MagicMock(), + graph=MagicMock(nodes={"iteration-node": FakeIterationNode()}), + layers=[], + ) + + worker._ready_queue.get.side_effect = ["iteration-node"] + + def put_side_effect(event: NodeRunFailedEvent | NodeRunStartedEvent) -> None: + captured_events.append(event) + if len(captured_events) == 2: + raise RuntimeError("queue boom") + worker.stop() + + worker._event_queue.put.side_effect = put_side_effect + + with patch("dify_graph.graph_engine.worker.naive_utc_now", return_value=failure_time): + worker.run() + + fallback_event = captured_events[-1] + + assert isinstance(fallback_event, NodeRunFailedEvent) + assert fallback_event.start_at == parent_start + assert fallback_event.finished_at == failure_time diff --git a/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py b/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py index 1e95ec1970..fd563d1be2 100644 --- a/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py +++ b/api/tests/unit_tests/core/workflow/nodes/answer/test_answer.py @@ -2,16 +2,15 @@ import time import uuid from unittest.mock import MagicMock -from core.app.entities.app_invoke_entities import InvokeFrom -from core.app.workflow.node_factory import DifyNodeFactory -from core.workflow.entities import GraphInitParams -from core.workflow.enums import WorkflowNodeExecutionStatus -from core.workflow.graph import Graph -from core.workflow.nodes.answer.answer_node import AnswerNode -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable +from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom +from core.workflow.node_factory import DifyNodeFactory +from dify_graph.enums import WorkflowNodeExecutionStatus +from dify_graph.graph import Graph +from dify_graph.nodes.answer.answer_node import AnswerNode +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable from extensions.ext_database import db -from models.enums import UserFrom +from tests.workflow_test_utils import build_test_graph_init_params def test_execute_answer(): @@ -36,11 +35,11 @@ def test_execute_answer(): ], } - init_params = GraphInitParams( - tenant_id="1", - app_id="1", + init_params = build_test_graph_init_params( workflow_id="1", graph_config=graph_config, + tenant_id="1", + app_id="1", user_id="1", user_from=UserFrom.ACCOUNT, invoke_from=InvokeFrom.DEBUGGER, @@ -65,7 +64,7 @@ def test_execute_answer(): graph_runtime_state=graph_runtime_state, ) - graph = Graph.init(graph_config=graph_config, node_factory=node_factory) + graph = Graph.init(graph_config=graph_config, node_factory=node_factory, root_node_id="start") node_config = { "id": "answer", diff --git a/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py b/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py index 21a642c2f8..81d3f5be9c 100644 --- a/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/base/test_base_node.py @@ -1,14 +1,12 @@ import pytest -from core.workflow.enums import NodeType -from core.workflow.nodes.base.entities import BaseNodeData -from core.workflow.nodes.base.node import Node +from core.workflow.node_factory import get_node_type_classes_mapping +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType +from dify_graph.nodes.base.node import Node -# Ensures that all node classes are imported. -from core.workflow.nodes.node_mapping import NODE_TYPE_CLASSES_MAPPING - -# Ensure `NODE_TYPE_CLASSES_MAPPING` is used and not automatically removed. -_ = NODE_TYPE_CLASSES_MAPPING +# Ensures that all production node classes are imported and registered. +_ = get_node_type_classes_mapping() class _TestNodeData(BaseNodeData): @@ -43,7 +41,7 @@ def test_ensure_subclasses_of_base_node_has_node_type_and_version_method_defined node_type = cls.node_type node_version = cls.version() - assert isinstance(cls.node_type, NodeType) + assert isinstance(cls.node_type, str) assert isinstance(node_version, str) node_type_and_version = (node_type, node_version) assert node_type_and_version not in type_version_set, ( @@ -56,7 +54,7 @@ def test_extract_node_data_type_from_generic_extracts_type(): """When a class inherits from Node[T], it should extract T.""" class _ConcreteNode(Node[_TestNodeData]): - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: @@ -108,7 +106,7 @@ def test_init_subclass_rejects_explicit_node_data_type_without_generic(): class _ExplicitNode(Node): _node_data_type = _TestNodeData - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: @@ -119,10 +117,27 @@ def test_init_subclass_sets_node_data_type_from_generic(): """Verify that __init_subclass__ sets _node_data_type from the generic parameter.""" class _AutoNode(Node[_TestNodeData]): - node_type = NodeType.CODE + node_type = BuiltinNodeTypes.CODE @staticmethod def version() -> str: return "1" assert _AutoNode._node_data_type is _TestNodeData + + +def test_validate_node_data_uses_declared_node_data_type(): + """Public validation should hydrate the subclass-declared node data model.""" + + class _AutoNode(Node[_TestNodeData]): + node_type = BuiltinNodeTypes.CODE + + @staticmethod + def version() -> str: + return "1" + + base_node_data = BaseNodeData.model_validate({"type": BuiltinNodeTypes.CODE, "title": "Test"}) + + validated = _AutoNode.validate_node_data(base_node_data) + + assert isinstance(validated, _TestNodeData) diff --git a/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py b/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py index 45d222b98c..972a945ca0 100644 --- a/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py +++ b/api/tests/unit_tests/core/workflow/nodes/base/test_get_node_type_classes_mapping.py @@ -1,26 +1,27 @@ import types from collections.abc import Mapping -from core.workflow.enums import NodeType -from core.workflow.nodes.base.entities import BaseNodeData -from core.workflow.nodes.base.node import Node +from core.workflow.node_factory import get_node_type_classes_mapping +from dify_graph.entities.base_node_data import BaseNodeData +from dify_graph.enums import BuiltinNodeTypes, NodeType +from dify_graph.nodes.base.node import Node # Import concrete nodes we will assert on (numeric version path) -from core.workflow.nodes.variable_assigner.v1.node import ( +from dify_graph.nodes.variable_assigner.v1.node import ( VariableAssignerNode as VariableAssignerV1, ) -from core.workflow.nodes.variable_assigner.v2.node import ( +from dify_graph.nodes.variable_assigner.v2.node import ( VariableAssignerNode as VariableAssignerV2, ) def test_variable_assigner_latest_prefers_highest_numeric_version(): # Act - mapping: Mapping[NodeType, Mapping[str, type[Node]]] = Node.get_node_type_classes_mapping() + mapping: Mapping[NodeType, Mapping[str, type[Node]]] = get_node_type_classes_mapping() # Assert basic presence - assert NodeType.VARIABLE_ASSIGNER in mapping - va_versions = mapping[NodeType.VARIABLE_ASSIGNER] + assert BuiltinNodeTypes.VARIABLE_ASSIGNER in mapping + va_versions = mapping[BuiltinNodeTypes.VARIABLE_ASSIGNER] # Both concrete versions must be present assert va_versions.get("1") is VariableAssignerV1 @@ -34,7 +35,7 @@ def test_latest_prefers_highest_numeric_version(): # Arrange: define two ephemeral subclasses with numeric versions under a NodeType # that has no concrete implementations in production to avoid interference. class _Version1(Node[BaseNodeData]): # type: ignore[misc] - node_type = NodeType.LEGACY_VARIABLE_AGGREGATOR + node_type = BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR def init_node_data(self, data): pass @@ -73,11 +74,11 @@ def test_latest_prefers_highest_numeric_version(): return "version2" # Act: build a fresh mapping (it should now see our ephemeral subclasses) - mapping: Mapping[NodeType, Mapping[str, type[Node]]] = Node.get_node_type_classes_mapping() + mapping: Mapping[NodeType, Mapping[str, type[Node]]] = get_node_type_classes_mapping() # Assert: both numeric versions exist for this NodeType; 'latest' points to the higher numeric version - assert NodeType.LEGACY_VARIABLE_AGGREGATOR in mapping - legacy_versions = mapping[NodeType.LEGACY_VARIABLE_AGGREGATOR] + assert BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR in mapping + legacy_versions = mapping[BuiltinNodeTypes.LEGACY_VARIABLE_AGGREGATOR] assert legacy_versions.get("1") is _Version1 assert legacy_versions.get("2") is _Version2 diff --git a/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py b/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py index 00c8cb3779..784e08edd2 100644 --- a/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/code/code_node_spec.py @@ -1,13 +1,13 @@ from configs import dify_config -from core.workflow.nodes.code.code_node import CodeNode -from core.workflow.nodes.code.entities import CodeLanguage, CodeNodeData -from core.workflow.nodes.code.exc import ( +from dify_graph.nodes.code.code_node import CodeNode +from dify_graph.nodes.code.entities import CodeLanguage, CodeNodeData +from dify_graph.nodes.code.exc import ( CodeNodeError, DepthLimitError, OutputValidationError, ) -from core.workflow.nodes.code.limits import CodeNodeLimits -from core.workflow.variables.types import SegmentType +from dify_graph.nodes.code.limits import CodeNodeLimits +from dify_graph.variables.types import SegmentType CodeNode._limits = CodeNodeLimits( max_string_length=dify_config.CODE_MAX_STRING_LENGTH, @@ -272,7 +272,7 @@ class TestCodeNodeExtractVariableSelector: result = CodeNode._extract_variable_selector_to_variable_mapping( graph_config={}, node_id="node_1", - node_data=node_data, + node_data=CodeNodeData.model_validate(node_data, from_attributes=True), ) assert result == {} @@ -292,7 +292,7 @@ class TestCodeNodeExtractVariableSelector: result = CodeNode._extract_variable_selector_to_variable_mapping( graph_config={}, node_id="node_1", - node_data=node_data, + node_data=CodeNodeData.model_validate(node_data, from_attributes=True), ) assert "node_1.input_text" in result @@ -315,7 +315,7 @@ class TestCodeNodeExtractVariableSelector: result = CodeNode._extract_variable_selector_to_variable_mapping( graph_config={}, node_id="code_node", - node_data=node_data, + node_data=CodeNodeData.model_validate(node_data, from_attributes=True), ) assert len(result) == 3 @@ -338,7 +338,7 @@ class TestCodeNodeExtractVariableSelector: result = CodeNode._extract_variable_selector_to_variable_mapping( graph_config={}, node_id="node_x", - node_data=node_data, + node_data=CodeNodeData.model_validate(node_data, from_attributes=True), ) assert result["node_x.deep_var"] == ["node", "obj", "nested", "value"] @@ -437,7 +437,7 @@ class TestCodeNodeInitialization: "outputs": {"x": {"type": "number"}}, } - node._node_data = node._hydrate_node_data(data) + node._node_data = CodeNode._node_data_type.model_validate(data, from_attributes=True) assert node._node_data.title == "Test Node" assert node._node_data.code_language == CodeLanguage.PYTHON3 @@ -453,7 +453,7 @@ class TestCodeNodeInitialization: "outputs": {"x": {"type": "number"}}, } - node._node_data = node._hydrate_node_data(data) + node._node_data = CodeNode._node_data_type.model_validate(data, from_attributes=True) assert node._node_data.code_language == CodeLanguage.JAVASCRIPT diff --git a/api/tests/unit_tests/core/workflow/nodes/code/entities_spec.py b/api/tests/unit_tests/core/workflow/nodes/code/entities_spec.py index 28d59c3568..de7ed0815e 100644 --- a/api/tests/unit_tests/core/workflow/nodes/code/entities_spec.py +++ b/api/tests/unit_tests/core/workflow/nodes/code/entities_spec.py @@ -1,8 +1,8 @@ import pytest from pydantic import ValidationError -from core.workflow.nodes.code.entities import CodeLanguage, CodeNodeData -from core.workflow.variables.types import SegmentType +from dify_graph.nodes.code.entities import CodeLanguage, CodeNodeData +from dify_graph.variables.types import SegmentType class TestCodeNodeDataOutput: diff --git a/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py b/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py index 584ed23e91..859115ceb3 100644 --- a/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/datasource/test_datasource_node.py @@ -1,6 +1,7 @@ -from core.workflow.entities.workflow_node_execution import WorkflowNodeExecutionStatus -from core.workflow.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent from core.workflow.nodes.datasource.datasource_node import DatasourceNode +from dify_graph.entities.graph_init_params import DIFY_RUN_CONTEXT_KEY +from dify_graph.entities.workflow_node_execution import WorkflowNodeExecutionStatus +from dify_graph.node_events import NodeRunResult, StreamChunkEvent, StreamCompletedEvent class _VarSeg: @@ -28,13 +29,17 @@ class _GraphState: class _GraphParams: - tenant_id = "t1" - app_id = "app-1" workflow_id = "wf-1" graph_config = {} - user_id = "u1" - user_from = "account" - invoke_from = "debugger" + run_context = { + DIFY_RUN_CONTEXT_KEY: { + "tenant_id": "t1", + "app_id": "app-1", + "user_id": "u1", + "user_from": "account", + "invoke_from": "debugger", + } + } call_depth = 0 @@ -69,6 +74,8 @@ def test_datasource_node_delegates_to_manager_stream(mocker): def get_upload_file_by_id(cls, **_): raise AssertionError("not called") + mocker.patch("core.workflow.nodes.datasource.datasource_node.DatasourceManager", new=_Mgr) + node = DatasourceNode( id="n", config={ @@ -85,7 +92,6 @@ def test_datasource_node_delegates_to_manager_stream(mocker): }, graph_init_params=gp, graph_runtime_state=gs, - datasource_manager=_Mgr, ) evts = list(node._run()) diff --git a/api/tests/unit_tests/core/workflow/nodes/http_request/test_config.py b/api/tests/unit_tests/core/workflow/nodes/http_request/test_config.py index 90f4cd018b..cd822a6f89 100644 --- a/api/tests/unit_tests/core/workflow/nodes/http_request/test_config.py +++ b/api/tests/unit_tests/core/workflow/nodes/http_request/test_config.py @@ -1,4 +1,4 @@ -from core.workflow.nodes.http_request import build_http_request_config +from dify_graph.nodes.http_request import build_http_request_config def test_build_http_request_config_uses_literal_defaults(): diff --git a/api/tests/unit_tests/core/workflow/nodes/http_request/test_entities.py b/api/tests/unit_tests/core/workflow/nodes/http_request/test_entities.py index 47a5df92a4..fec6ad90eb 100644 --- a/api/tests/unit_tests/core/workflow/nodes/http_request/test_entities.py +++ b/api/tests/unit_tests/core/workflow/nodes/http_request/test_entities.py @@ -4,7 +4,7 @@ from unittest.mock import Mock, PropertyMock, patch import httpx import pytest -from core.workflow.nodes.http_request.entities import Response +from dify_graph.nodes.http_request.entities import Response @pytest.fixture @@ -104,7 +104,7 @@ def test_mimetype_based_detection(mock_response, content_type, expected_main_typ mock_response.headers = {"content-type": content_type} type(mock_response).content = PropertyMock(return_value=bytes([0x00])) # Dummy content - with patch("core.workflow.nodes.http_request.entities.mimetypes.guess_type") as mock_guess_type: + with patch("dify_graph.nodes.http_request.entities.mimetypes.guess_type") as mock_guess_type: # Mock the return value based on expected_main_type if expected_main_type: mock_guess_type.return_value = (f"{expected_main_type}/subtype", None) diff --git a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py index 67da890eb2..cea7195417 100644 --- a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py +++ b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_executor.py @@ -2,19 +2,19 @@ import pytest from configs import dify_config from core.helper.ssrf_proxy import ssrf_proxy -from core.workflow.file.file_manager import file_manager -from core.workflow.nodes.http_request import ( +from dify_graph.file.file_manager import file_manager +from dify_graph.nodes.http_request import ( BodyData, HttpRequestNodeAuthorization, HttpRequestNodeBody, HttpRequestNodeConfig, HttpRequestNodeData, ) -from core.workflow.nodes.http_request.entities import HttpRequestNodeTimeout -from core.workflow.nodes.http_request.exc import AuthorizationConfigError -from core.workflow.nodes.http_request.executor import Executor -from core.workflow.runtime import VariablePool -from core.workflow.system_variable import SystemVariable +from dify_graph.nodes.http_request.entities import HttpRequestNodeTimeout +from dify_graph.nodes.http_request.exc import AuthorizationConfigError +from dify_graph.nodes.http_request.executor import Executor +from dify_graph.runtime import VariablePool +from dify_graph.system_variable import SystemVariable HTTP_REQUEST_CONFIG = HttpRequestNodeConfig( max_connect_timeout=dify_config.HTTP_REQUEST_MAX_CONNECT_TIMEOUT, diff --git a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_node.py b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_node.py index cad0466809..5e34bf1d94 100644 --- a/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_node.py +++ b/api/tests/unit_tests/core/workflow/nodes/http_request/test_http_request_node.py @@ -4,17 +4,16 @@ from typing import Any import httpx import pytest -from core.app.entities.app_invoke_entities import InvokeFrom +from core.app.entities.app_invoke_entities import InvokeFrom, UserFrom from core.helper.ssrf_proxy import ssrf_proxy from core.tools.tool_file_manager import ToolFileManager -from core.workflow.entities import GraphInitParams -from core.workflow.enums import WorkflowNodeExecutionStatus -from core.workflow.file.file_manager import file_manager -from core.workflow.nodes.http_request import HTTP_REQUEST_CONFIG_FILTER_KEY, HttpRequestNode, HttpRequestNodeConfig -from core.workflow.nodes.http_request.entities import HttpRequestNodeTimeout, Response -from core.workflow.runtime import GraphRuntimeState, VariablePool -from core.workflow.system_variable import SystemVariable -from models.enums import UserFrom +from dify_graph.enums import WorkflowNodeExecutionStatus +from dify_graph.file.file_manager import file_manager +from dify_graph.nodes.http_request import HTTP_REQUEST_CONFIG_FILTER_KEY, HttpRequestNode, HttpRequestNodeConfig +from dify_graph.nodes.http_request.entities import HttpRequestNodeTimeout, Response +from dify_graph.runtime import GraphRuntimeState, VariablePool +from dify_graph.system_variable import SystemVariable +from tests.workflow_test_utils import build_test_graph_init_params HTTP_REQUEST_CONFIG = HttpRequestNodeConfig( max_connect_timeout=10, @@ -99,11 +98,11 @@ def _build_http_node( ], "edges": [], } - graph_init_params = GraphInitParams( - tenant_id="tenant", - app_id="app", + graph_init_params = build_test_graph_init_params( workflow_id="workflow", graph_config=graph_config, + tenant_id="tenant", + app_id="app", user_id="user", user_from=UserFrom.ACCOUNT, invoke_from=InvokeFrom.DEBUGGER, @@ -162,7 +161,7 @@ def test_run_passes_node_data_ssl_verify_to_executor(monkeypatch: pytest.MonkeyP ) ) - monkeypatch.setattr("core.workflow.nodes.http_request.node.Executor", FakeExecutor) + monkeypatch.setattr("dify_graph.nodes.http_request.node.Executor", FakeExecutor) result = node._run() diff --git a/api/tests/unit_tests/core/workflow/nodes/human_input/test_email_delivery_config.py b/api/tests/unit_tests/core/workflow/nodes/human_input/test_email_delivery_config.py index ca4a887d20..d52dfa2a65 100644 --- a/api/tests/unit_tests/core/workflow/nodes/human_input/test_email_delivery_config.py +++ b/api/tests/unit_tests/core/workflow/nodes/human_input/test_email_delivery_config.py @@ -1,5 +1,5 @@ -from core.workflow.nodes.human_input.entities import EmailDeliveryConfig, EmailRecipients -from core.workflow.runtime import VariablePool +from dify_graph.nodes.human_input.entities import EmailDeliveryConfig, EmailRecipients +from dify_graph.runtime import VariablePool def test_render_body_template_replaces_variable_values(): @@ -14,3 +14,64 @@ def test_render_body_template_replaces_variable_values(): result = config.render_body_template(body=config.body, url="https://example.com", variable_pool=variable_pool) assert result == "Hello World https://example.com" + + +def test_render_markdown_body_renders_markdown_to_html(): + rendered = EmailDeliveryConfig.render_markdown_body("**Bold** and [link](https://example.com)") + + assert "Bold" in rendered + assert 'link' in rendered + + +def test_render_markdown_body_sanitizes_unsafe_html(): + rendered = EmailDeliveryConfig.render_markdown_body( + 'Click' + ) + + assert "' } + render(