From 9970f4449a93acd670a27cda1301c41ee17aec67 Mon Sep 17 00:00:00 2001
From: wangxiaolei <fatelei@gmail.com>
Date: Mon, 9 Mar 2026 15:53:21 +0800
Subject: [PATCH] refactor: reuse redis connection instead of create new one
 (#32678)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
---
 api/schedule/queue_monitor_task.py            |    4 +
 api/schedule/trigger_provider_refresh_task.py |   20 +-
 api/schedule/workflow_schedule_task.py        |   34 +-
 api/tasks/document_indexing_task.py           |   39 +-
 .../rag_pipeline/rag_pipeline_run_task.py     |   38 +-
 .../tasks/test_dataset_indexing_task.py       |   24 +-
 .../tasks/test_document_indexing_task.py      |   27 +-
 .../tasks/test_rag_pipeline_run_tasks.py      |   91 +-
 .../trigger/conftest.py                       |   12 +-
 .../tasks/test_dataset_indexing_task.py       | 1183 ++++++++++++++++-
 10 files changed, 1360 insertions(+), 112 deletions(-)

diff --git a/api/schedule/queue_monitor_task.py b/api/schedule/queue_monitor_task.py
index 77d6b5a138..01642e397e 100644
--- a/api/schedule/queue_monitor_task.py
+++ b/api/schedule/queue_monitor_task.py
@@ -21,6 +21,10 @@ celery_redis = Redis(
     ssl_cert_reqs=getattr(dify_config, "REDIS_SSL_CERT_REQS", None) if dify_config.BROKER_USE_SSL else None,
     ssl_certfile=getattr(dify_config, "REDIS_SSL_CERTFILE", None) if dify_config.BROKER_USE_SSL else None,
     ssl_keyfile=getattr(dify_config, "REDIS_SSL_KEYFILE", None) if dify_config.BROKER_USE_SSL else None,
+    # Add conservative socket timeouts and health checks to avoid long-lived half-open sockets
+    socket_timeout=5,
+    socket_connect_timeout=5,
+    health_check_interval=30,
 )
 
 logger = logging.getLogger(__name__)
diff --git a/api/schedule/trigger_provider_refresh_task.py b/api/schedule/trigger_provider_refresh_task.py
index 3b3e478793..df5058d70a 100644
--- a/api/schedule/trigger_provider_refresh_task.py
+++ b/api/schedule/trigger_provider_refresh_task.py
@@ -3,6 +3,7 @@ import math
 import time
 from collections.abc import Iterable, Sequence
 
+from celery import group
 from sqlalchemy import ColumnElement, and_, func, or_, select
 from sqlalchemy.engine.row import Row
 from sqlalchemy.orm import Session
@@ -85,20 +86,25 @@ def trigger_provider_refresh() -> None:
             lock_keys: list[str] = build_trigger_refresh_lock_keys(subscriptions)
             acquired: list[bool] = _acquire_locks(keys=lock_keys, ttl_seconds=lock_ttl)
 
-            enqueued: int = 0
-            for (tenant_id, subscription_id), is_locked in zip(subscriptions, acquired):
-                if not is_locked:
-                    continue
-                trigger_subscription_refresh.delay(tenant_id=tenant_id, subscription_id=subscription_id)
-                enqueued += 1
+            if not any(acquired):
+                continue
+
+            jobs = [
+                trigger_subscription_refresh.s(tenant_id=tenant_id, subscription_id=subscription_id)
+                for (tenant_id, subscription_id), is_locked in zip(subscriptions, acquired)
+                if is_locked
+            ]
+            result = group(jobs).apply_async()
+            enqueued = len(jobs)
 
             logger.info(
-                "Trigger refresh page %d/%d: scanned=%d locks_acquired=%d enqueued=%d",
+                "Trigger refresh page %d/%d: scanned=%d locks_acquired=%d enqueued=%d result=%s",
                 page + 1,
                 pages,
                 len(subscriptions),
                 sum(1 for x in acquired if x),
                 enqueued,
+                result,
             )
 
     logger.info("Trigger refresh scan done: due=%d", total_due)
diff --git a/api/schedule/workflow_schedule_task.py b/api/schedule/workflow_schedule_task.py
index d68b9565ec..2fee9e467d 100644
--- a/api/schedule/workflow_schedule_task.py
+++ b/api/schedule/workflow_schedule_task.py
@@ -1,6 +1,6 @@
 import logging
 
-from celery import group, shared_task
+from celery import current_app, group, shared_task
 from sqlalchemy import and_, select
 from sqlalchemy.orm import Session, sessionmaker
 
@@ -29,31 +29,27 @@ def poll_workflow_schedules() -> None:
     with session_factory() as session:
         total_dispatched = 0
 
-        # Process in batches until we've handled all due schedules or hit the limit
         while True:
             due_schedules = _fetch_due_schedules(session)
 
             if not due_schedules:
                 break
 
-            dispatched_count = _process_schedules(session, due_schedules)
-            total_dispatched += dispatched_count
+            with current_app.producer_or_acquire() as producer:  # type: ignore
+                dispatched_count = _process_schedules(session, due_schedules, producer)
+                total_dispatched += dispatched_count
 
-            logger.debug("Batch processed: %d dispatched", dispatched_count)
-
-            # Circuit breaker: check if we've hit the per-tick limit (if enabled)
-            if (
-                dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK > 0
-                and total_dispatched >= dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK
-            ):
-                logger.warning(
-                    "Circuit breaker activated: reached dispatch limit (%d), will continue next tick",
-                    dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK,
-                )
-                break
+                logger.debug("Batch processed: %d dispatched", dispatched_count)
 
+                # Circuit breaker: check if we've hit the per-tick limit (if enabled)
+                if 0 < dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK <= total_dispatched:
+                    logger.warning(
+                        "Circuit breaker activated: reached dispatch limit (%d), will continue next tick",
+                        dify_config.WORKFLOW_SCHEDULE_MAX_DISPATCH_PER_TICK,
+                    )
+                    break
         if total_dispatched > 0:
-            logger.info("Total processed: %d dispatched", total_dispatched)
+            logger.info("Total processed: %d workflow schedule(s) dispatched", total_dispatched)
 
 
 def _fetch_due_schedules(session: Session) -> list[WorkflowSchedulePlan]:
@@ -90,7 +86,7 @@ def _fetch_due_schedules(session: Session) -> list[WorkflowSchedulePlan]:
     return list(due_schedules)
 
 
-def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan]) -> int:
+def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan], producer=None) -> int:
     """Process schedules: check quota, update next run time and dispatch to Celery in parallel."""
     if not schedules:
         return 0
@@ -107,7 +103,7 @@ def _process_schedules(session: Session, schedules: list[WorkflowSchedulePlan])
 
     if tasks_to_dispatch:
         job = group(run_schedule_trigger.s(schedule_id) for schedule_id in tasks_to_dispatch)
-        job.apply_async()
+        job.apply_async(producer=producer)
 
         logger.debug("Dispatched %d tasks in parallel", len(tasks_to_dispatch))
 
diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py
index 11edcf151f..b3f36d8f44 100644
--- a/api/tasks/document_indexing_task.py
+++ b/api/tasks/document_indexing_task.py
@@ -1,9 +1,10 @@
 import logging
 import time
-from collections.abc import Callable, Sequence
+from collections.abc import Sequence
+from typing import Any, Protocol
 
 import click
-from celery import shared_task
+from celery import current_app, shared_task
 
 from configs import dify_config
 from core.db.session_factory import session_factory
@@ -19,6 +20,12 @@ from tasks.generate_summary_index_task import generate_summary_index_task
 logger = logging.getLogger(__name__)
 
 
+class CeleryTaskLike(Protocol):
+    def delay(self, *args: Any, **kwargs: Any) -> Any: ...
+
+    def apply_async(self, *args: Any, **kwargs: Any) -> Any: ...
+
+
 @shared_task(queue="dataset")
 def document_indexing_task(dataset_id: str, document_ids: list):
     """
@@ -179,8 +186,8 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
 
 
 def _document_indexing_with_tenant_queue(
-    tenant_id: str, dataset_id: str, document_ids: Sequence[str], task_func: Callable[[str, str, Sequence[str]], None]
-):
+    tenant_id: str, dataset_id: str, document_ids: Sequence[str], task_func: CeleryTaskLike
+) -> None:
     try:
         _document_indexing(dataset_id, document_ids)
     except Exception:
@@ -201,16 +208,20 @@ def _document_indexing_with_tenant_queue(
         logger.info("document indexing tenant isolation queue %s next tasks: %s", tenant_id, next_tasks)
 
         if next_tasks:
-            for next_task in next_tasks:
-                document_task = DocumentTask(**next_task)
-                # Process the next waiting task
-                # Keep the flag set to indicate a task is running
-                tenant_isolated_task_queue.set_task_waiting_time()
-                task_func.delay(  # type: ignore
-                    tenant_id=document_task.tenant_id,
-                    dataset_id=document_task.dataset_id,
-                    document_ids=document_task.document_ids,
-                )
+            with current_app.producer_or_acquire() as producer:  # type: ignore
+                for next_task in next_tasks:
+                    document_task = DocumentTask(**next_task)
+                    # Keep the flag set to indicate a task is running
+                    tenant_isolated_task_queue.set_task_waiting_time()
+                    task_func.apply_async(
+                        kwargs={
+                            "tenant_id": document_task.tenant_id,
+                            "dataset_id": document_task.dataset_id,
+                            "document_ids": document_task.document_ids,
+                        },
+                        producer=producer,
+                    )
+
         else:
             # No more waiting tasks, clear the flag
             tenant_isolated_task_queue.delete_task_key()
diff --git a/api/tasks/rag_pipeline/rag_pipeline_run_task.py b/api/tasks/rag_pipeline/rag_pipeline_run_task.py
index 093342d1a3..52f66dddb8 100644
--- a/api/tasks/rag_pipeline/rag_pipeline_run_task.py
+++ b/api/tasks/rag_pipeline/rag_pipeline_run_task.py
@@ -3,12 +3,13 @@ import json
 import logging
 import time
 import uuid
-from collections.abc import Mapping
+from collections.abc import Mapping, Sequence
 from concurrent.futures import ThreadPoolExecutor
+from itertools import islice
 from typing import Any
 
 import click
-from celery import shared_task  # type: ignore
+from celery import group, shared_task
 from flask import current_app, g
 from sqlalchemy.orm import Session, sessionmaker
 
@@ -27,6 +28,11 @@ from services.file_service import FileService
 logger = logging.getLogger(__name__)
 
 
+def chunked(iterable: Sequence, size: int):
+    it = iter(iterable)
+    return iter(lambda: list(islice(it, size)), [])
+
+
 @shared_task(queue="pipeline")
 def rag_pipeline_run_task(
     rag_pipeline_invoke_entities_file_id: str,
@@ -83,16 +89,24 @@ def rag_pipeline_run_task(
         logger.info("rag pipeline tenant isolation queue %s next files: %s", tenant_id, next_file_ids)
 
         if next_file_ids:
-            for next_file_id in next_file_ids:
-                # Process the next waiting task
-                # Keep the flag set to indicate a task is running
-                tenant_isolated_task_queue.set_task_waiting_time()
-                rag_pipeline_run_task.delay(  # type: ignore
-                    rag_pipeline_invoke_entities_file_id=next_file_id.decode("utf-8")
-                    if isinstance(next_file_id, bytes)
-                    else next_file_id,
-                    tenant_id=tenant_id,
-                )
+            for batch in chunked(next_file_ids, 100):
+                jobs = []
+                for next_file_id in batch:
+                    tenant_isolated_task_queue.set_task_waiting_time()
+
+                    file_id = (
+                        next_file_id.decode("utf-8") if isinstance(next_file_id, (bytes, bytearray)) else next_file_id
+                    )
+
+                    jobs.append(
+                        rag_pipeline_run_task.s(
+                            rag_pipeline_invoke_entities_file_id=file_id,
+                            tenant_id=tenant_id,
+                        )
+                    )
+
+                if jobs:
+                    group(jobs).apply_async()
         else:
             # No more waiting tasks, clear the flag
             tenant_isolated_task_queue.delete_task_key()
diff --git a/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py b/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py
index 207bdad751..4a62383590 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_dataset_indexing_task.py
@@ -322,11 +322,14 @@ class TestDatasetIndexingTaskIntegration:
             _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
 
         # Assert
-        task_dispatch_spy.delay.assert_called_once_with(
-            tenant_id=next_task["tenant_id"],
-            dataset_id=next_task["dataset_id"],
-            document_ids=next_task["document_ids"],
-        )
+        # apply_async is used by implementation; assert it was called once with expected kwargs
+        assert task_dispatch_spy.apply_async.call_count == 1
+        call_kwargs = task_dispatch_spy.apply_async.call_args.kwargs.get("kwargs", {})
+        assert call_kwargs == {
+            "tenant_id": next_task["tenant_id"],
+            "dataset_id": next_task["dataset_id"],
+            "document_ids": next_task["document_ids"],
+        }
         set_waiting_spy.assert_called_once()
         delete_key_spy.assert_not_called()
 
@@ -352,7 +355,7 @@ class TestDatasetIndexingTaskIntegration:
             _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
 
         # Assert
-        task_dispatch_spy.delay.assert_not_called()
+        task_dispatch_spy.apply_async.assert_not_called()
         delete_key_spy.assert_called_once()
 
     def test_validation_failure_sets_error_status_when_vector_space_at_limit(
@@ -447,7 +450,7 @@ class TestDatasetIndexingTaskIntegration:
             _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
 
         # Assert
-        task_dispatch_spy.delay.assert_called_once()
+        task_dispatch_spy.apply_async.assert_called_once()
 
     def test_sessions_close_on_successful_indexing(
         self,
@@ -534,7 +537,7 @@ class TestDatasetIndexingTaskIntegration:
             _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
 
         # Assert
-        assert task_dispatch_spy.delay.call_count == concurrency_limit
+        assert task_dispatch_spy.apply_async.call_count == concurrency_limit
         assert set_waiting_spy.call_count == concurrency_limit
 
     def test_task_queue_fifo_ordering(self, db_session_with_containers, patched_external_dependencies):
@@ -565,9 +568,10 @@ class TestDatasetIndexingTaskIntegration:
             _document_indexing_with_tenant_queue(dataset.tenant_id, dataset.id, document_ids, task_dispatch_spy)
 
         # Assert
-        assert task_dispatch_spy.delay.call_count == 3
+        assert task_dispatch_spy.apply_async.call_count == 3
         for index, expected_task in enumerate(ordered_tasks):
-            assert task_dispatch_spy.delay.call_args_list[index].kwargs["document_ids"] == expected_task["document_ids"]
+            call_kwargs = task_dispatch_spy.apply_async.call_args_list[index].kwargs.get("kwargs", {})
+            assert call_kwargs.get("document_ids") == expected_task["document_ids"]
 
     def test_billing_disabled_skips_limit_checks(self, db_session_with_containers, patched_external_dependencies):
         """Skip limit checks when billing feature is disabled."""
diff --git a/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py b/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py
index 4be1180c73..5dc1f6bee0 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_document_indexing_task.py
@@ -762,11 +762,12 @@ class TestDocumentIndexingTasks:
         mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
 
         # Verify task function was called for each waiting task
-        assert mock_task_func.delay.call_count == 1
+        assert mock_task_func.apply_async.call_count == 1
 
         # Verify correct parameters for each call
-        calls = mock_task_func.delay.call_args_list
-        assert calls[0][1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
+        calls = mock_task_func.apply_async.call_args_list
+        sent_kwargs = calls[0][1]["kwargs"]
+        assert sent_kwargs == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
 
         # Verify queue is empty after processing (tasks were pulled)
         remaining_tasks = queue.pull_tasks(count=10)  # Pull more than we added
@@ -830,11 +831,15 @@ class TestDocumentIndexingTasks:
             assert updated_document.processing_started_at is not None
 
         # Verify waiting task was still processed despite core processing error
-        mock_task_func.delay.assert_called_once()
+        mock_task_func.apply_async.assert_called_once()
 
         # Verify correct parameters for the call
-        call = mock_task_func.delay.call_args
-        assert call[1] == {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["waiting-doc-1"]}
+        call = mock_task_func.apply_async.call_args
+        assert call[1]["kwargs"] == {
+            "tenant_id": tenant_id,
+            "dataset_id": dataset_id,
+            "document_ids": ["waiting-doc-1"],
+        }
 
         # Verify queue is empty after processing (task was pulled)
         remaining_tasks = queue.pull_tasks(count=10)
@@ -896,9 +901,13 @@ class TestDocumentIndexingTasks:
         mock_external_service_dependencies["indexing_runner_instance"].run.assert_called_once()
 
         # Verify only tenant1's waiting task was processed
-        mock_task_func.delay.assert_called_once()
-        call = mock_task_func.delay.call_args
-        assert call[1] == {"tenant_id": tenant1_id, "dataset_id": dataset1_id, "document_ids": ["tenant1-doc-1"]}
+        mock_task_func.apply_async.assert_called_once()
+        call = mock_task_func.apply_async.call_args
+        assert call[1]["kwargs"] == {
+            "tenant_id": tenant1_id,
+            "dataset_id": dataset1_id,
+            "document_ids": ["tenant1-doc-1"],
+        }
 
         # Verify tenant1's queue is empty
         remaining_tasks1 = queue1.pull_tasks(count=10)
diff --git a/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py b/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py
index ef7191299a..f01fcc1742 100644
--- a/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py
+++ b/api/tests/test_containers_integration_tests/tasks/test_rag_pipeline_run_tasks.py
@@ -1,6 +1,6 @@
 import json
 import uuid
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
 
 import pytest
 from faker import Faker
@@ -388,8 +388,10 @@ class TestRagPipelineRunTasks:
         # Set the task key to indicate there are waiting tasks (legacy behavior)
         redis_client.set(legacy_task_key, 1, ex=60 * 60)
 
-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
             # Act: Execute the priority task with new code but legacy queue data
             rag_pipeline_run_task(file_id, tenant.id)
 
@@ -398,13 +400,14 @@ class TestRagPipelineRunTasks:
             mock_file_service["delete_file"].assert_called_once_with(file_id)
             assert mock_pipeline_generator.call_count == 1
 
-            # Verify waiting tasks were processed, pull 1 task a time by default
-            assert mock_delay.call_count == 1
+            # Verify waiting tasks were processed via group, pull 1 task a time by default
+            assert mock_group.return_value.apply_async.called
 
-            # Verify correct parameters for the call
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0]
-            assert call_kwargs.get("tenant_id") == tenant.id
+            # Verify correct parameters for the first scheduled job signature
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == legacy_file_ids[0]
+            assert first_kwargs.get("tenant_id") == tenant.id
 
             # Verify that new code can process legacy queue entries
             # The new TenantIsolatedTaskQueue should be able to read from the legacy format
@@ -446,8 +449,10 @@ class TestRagPipelineRunTasks:
         waiting_file_ids = [str(uuid.uuid4()) for _ in range(3)]
         queue.push_tasks(waiting_file_ids)
 
-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
             # Act: Execute the regular task
             rag_pipeline_run_task(file_id, tenant.id)
 
@@ -456,13 +461,14 @@ class TestRagPipelineRunTasks:
             mock_file_service["delete_file"].assert_called_once_with(file_id)
             assert mock_pipeline_generator.call_count == 1
 
-            # Verify waiting tasks were processed, pull 1 task a time by default
-            assert mock_delay.call_count == 1
+            # Verify waiting tasks were processed via group.apply_async
+            assert mock_group.return_value.apply_async.called
 
-            # Verify correct parameters for the call
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
-            assert call_kwargs.get("tenant_id") == tenant.id
+            # Verify correct parameters for the first scheduled job signature
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_ids[0]
+            assert first_kwargs.get("tenant_id") == tenant.id
 
             # Verify queue still has remaining tasks (only 1 was pulled)
             remaining_tasks = queue.pull_tasks(count=10)
@@ -557,8 +563,10 @@ class TestRagPipelineRunTasks:
         waiting_file_id = str(uuid.uuid4())
         queue.push_tasks([waiting_file_id])
 
-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
             # Act: Execute the regular task (should not raise exception)
             rag_pipeline_run_task(file_id, tenant.id)
 
@@ -569,12 +577,13 @@ class TestRagPipelineRunTasks:
             assert mock_pipeline_generator.call_count == 1
 
             # Verify waiting task was still processed despite core processing error
-            mock_delay.assert_called_once()
+            assert mock_group.return_value.apply_async.called
 
-            # Verify correct parameters for the call
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
-            assert call_kwargs.get("tenant_id") == tenant.id
+            # Verify correct parameters for the first scheduled job signature
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
+            assert first_kwargs.get("tenant_id") == tenant.id
 
             # Verify queue is empty after processing (task was pulled)
             remaining_tasks = queue.pull_tasks(count=10)
@@ -684,8 +693,10 @@ class TestRagPipelineRunTasks:
         queue1.push_tasks([waiting_file_id1])
         queue2.push_tasks([waiting_file_id2])
 
-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
             # Act: Execute the regular task for tenant1 only
             rag_pipeline_run_task(file_id1, tenant1.id)
 
@@ -694,11 +705,12 @@ class TestRagPipelineRunTasks:
             assert mock_file_service["delete_file"].call_count == 1
             assert mock_pipeline_generator.call_count == 1
 
-            # Verify only tenant1's waiting task was processed
-            mock_delay.assert_called_once()
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
-            assert call_kwargs.get("tenant_id") == tenant1.id
+            # Verify only tenant1's waiting task was processed (via group)
+            assert mock_group.return_value.apply_async.called
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id1
+            assert first_kwargs.get("tenant_id") == tenant1.id
 
             # Verify tenant1's queue is empty
             remaining_tasks1 = queue1.pull_tasks(count=10)
@@ -913,8 +925,10 @@ class TestRagPipelineRunTasks:
         waiting_file_id = str(uuid.uuid4())
         queue.push_tasks([waiting_file_id])
 
-        # Mock the task function calls
-        with patch("tasks.rag_pipeline.rag_pipeline_run_task.rag_pipeline_run_task.delay") as mock_delay:
+        # Mock the Celery group scheduling used by the implementation
+        with patch("tasks.rag_pipeline.rag_pipeline_run_task.group") as mock_group:
+            mock_group.return_value.apply_async = MagicMock()
+
             # Act & Assert: Execute the regular task (should raise Exception)
             with pytest.raises(Exception, match="File not found"):
                 rag_pipeline_run_task(file_id, tenant.id)
@@ -924,12 +938,13 @@ class TestRagPipelineRunTasks:
             mock_pipeline_generator.assert_not_called()
 
             # Verify waiting task was still processed despite file error
-            mock_delay.assert_called_once()
+            assert mock_group.return_value.apply_async.called
 
-            # Verify correct parameters for the call
-            call_kwargs = mock_delay.call_args[1] if mock_delay.call_args else {}
-            assert call_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
-            assert call_kwargs.get("tenant_id") == tenant.id
+            # Verify correct parameters for the first scheduled job signature
+            jobs = mock_group.call_args.args[0] if mock_group.call_args else []
+            first_kwargs = jobs[0].kwargs if jobs else {}
+            assert first_kwargs.get("rag_pipeline_invoke_entities_file_id") == waiting_file_id
+            assert first_kwargs.get("tenant_id") == tenant.id
 
             # Verify queue is empty after processing (task was pulled)
             remaining_tasks = queue.pull_tasks(count=10)
diff --git a/api/tests/test_containers_integration_tests/trigger/conftest.py b/api/tests/test_containers_integration_tests/trigger/conftest.py
index 9c1fd5e0ec..e3832fb2ef 100644
--- a/api/tests/test_containers_integration_tests/trigger/conftest.py
+++ b/api/tests/test_containers_integration_tests/trigger/conftest.py
@@ -105,18 +105,26 @@ def app_model(
 
 
 class MockCeleryGroup:
-    """Mock for celery group() function that collects dispatched tasks."""
+    """Mock for celery group() function that collects dispatched tasks.
+
+    Matches the Celery group API loosely, accepting arbitrary kwargs on apply_async
+    (e.g. producer) so production code can pass broker-related options without
+    breaking tests.
+    """
 
     def __init__(self) -> None:
         self.collected: list[dict[str, Any]] = []
         self._applied = False
+        self.last_apply_async_kwargs: dict[str, Any] | None = None
 
     def __call__(self, items: Any) -> MockCeleryGroup:
         self.collected = list(items)
         return self
 
-    def apply_async(self) -> None:
+    def apply_async(self, **kwargs: Any) -> None:
+        # Accept arbitrary kwargs like producer to be compatible with Celery
         self._applied = True
+        self.last_apply_async_kwargs = kwargs
 
     @property
     def applied(self) -> bool:
diff --git a/api/tests/unit_tests/tasks/test_dataset_indexing_task.py b/api/tests/unit_tests/tasks/test_dataset_indexing_task.py
index 11b4663187..67e0a8efaf 100644
--- a/api/tests/unit_tests/tasks/test_dataset_indexing_task.py
+++ b/api/tests/unit_tests/tasks/test_dataset_indexing_task.py
@@ -10,14 +10,23 @@ This module tests the document indexing task functionality including:
 """
 
 import uuid
-from unittest.mock import Mock, patch
+from unittest.mock import MagicMock, Mock, patch
 
 import pytest
 
+from core.indexing_runner import DocumentIsPausedError
 from core.rag.pipeline.queue import TenantIsolatedTaskQueue
 from enums.cloud_plan import CloudPlan
 from extensions.ext_redis import redis_client
+from models.dataset import Dataset, Document
 from services.document_indexing_proxy.document_indexing_task_proxy import DocumentIndexingTaskProxy
+from tasks.document_indexing_task import (
+    _document_indexing,
+    _document_indexing_with_tenant_queue,
+    document_indexing_task,
+    normal_document_indexing_task,
+    priority_document_indexing_task,
+)
 
 # ============================================================================
 # Fixtures
@@ -56,6 +65,190 @@ def mock_redis():
     return redis_client
 
 
+# Additional fixtures required by tests in this module
+
+
+@pytest.fixture
+def mock_db_session():
+    """Mock session_factory.create_session() to return a session whose queries use shared test data.
+
+    Tests set session._shared_data = {"dataset": <Dataset>, "documents": [<Document>, ...]}
+    This fixture makes session.query(Dataset).first() return the shared dataset,
+    and session.query(Document).all()/first() return from the shared documents.
+    """
+    with patch("tasks.document_indexing_task.session_factory") as mock_sf:
+        session = MagicMock()
+        session._shared_data = {"dataset": None, "documents": []}
+
+        # Keep a pointer so repeated Document.first() calls iterate across provided docs
+        session._doc_first_idx = 0
+
+        def _query_side_effect(model):
+            q = MagicMock()
+
+            # Capture filters passed via where(...) so first()/all() can honor them.
+            q._filters = {}
+
+            def _extract_filters(*conds, **kw):
+                # Support both SQLAlchemy expressions (BinaryExpression) and kwargs
+                # We only need the simple fields used by production code: id, dataset_id, and id.in_(...)
+                for cond in conds:
+                    left = getattr(cond, "left", None)
+                    right = getattr(cond, "right", None)
+                    key = None
+                    if left is not None:
+                        key = getattr(left, "key", None) or getattr(left, "name", None)
+                    if not key:
+                        continue
+                    # Right side might be a BindParameter with .value, or a raw value/sequence
+                    val = getattr(right, "value", right)
+                    q._filters[key] = val
+                # Also accept kwargs (e.g., where(id=...)) just in case
+                for k, v in kw.items():
+                    q._filters[k] = v
+
+            def _where_side_effect(*conds, **kw):
+                _extract_filters(*conds, **kw)
+                return q
+
+            q.where.side_effect = _where_side_effect
+
+            # Dataset queries
+            if model.__name__ == "Dataset":
+
+                def _dataset_first():
+                    ds = session._shared_data.get("dataset")
+                    if not ds:
+                        return None
+                    if "id" in q._filters:
+                        val = q._filters["id"]
+                        if isinstance(val, (list, tuple, set)):
+                            return ds if ds.id in val else None
+                        return ds if ds.id == val else None
+                    return ds
+
+                def _dataset_all():
+                    ds = session._shared_data.get("dataset")
+                    if not ds:
+                        return []
+                    first = _dataset_first()
+                    return [first] if first else []
+
+                q.first.side_effect = _dataset_first
+                q.all.side_effect = _dataset_all
+                return q
+
+            # Document queries
+            if model.__name__ == "Document":
+
+                def _apply_doc_filters(docs):
+                    result = list(docs)
+                    for key in ("id", "dataset_id"):
+                        if key in q._filters:
+                            val = q._filters[key]
+                            if isinstance(val, (list, tuple, set)):
+                                result = [d for d in result if getattr(d, key, None) in val]
+                            else:
+                                result = [d for d in result if getattr(d, key, None) == val]
+                    return result
+
+                def _docs_all():
+                    docs = session._shared_data.get("documents", [])
+                    return _apply_doc_filters(docs)
+
+                def _docs_first():
+                    docs = _docs_all()
+                    return docs[0] if docs else None
+
+                q.all.side_effect = _docs_all
+                q.first.side_effect = _docs_first
+                return q
+
+            # Default fallback
+            q.first.return_value = None
+            q.all.return_value = []
+            return q
+
+        session.query.side_effect = _query_side_effect
+
+        # Implement session.begin() context manager that commits on exit
+        session.commit = MagicMock()
+        bm = MagicMock()
+        bm.__enter__.return_value = session
+
+        def _bm_exit_side_effect(*args, **kwargs):
+            session.commit()
+
+        bm.__exit__.side_effect = _bm_exit_side_effect
+        session.begin.return_value = bm
+
+        # Context manager behavior for create_session(): ensure close() is called on exit
+        session.close = MagicMock()
+        cm = MagicMock()
+        cm.__enter__.return_value = session
+
+        def _exit_side_effect(*args, **kwargs):
+            session.close()
+
+        cm.__exit__.side_effect = _exit_side_effect
+        mock_sf.create_session.return_value = cm
+
+        yield session
+
+
+@pytest.fixture
+def mock_dataset(dataset_id, tenant_id):
+    """Create a mock Dataset object."""
+    dataset = Mock(spec=Dataset)
+    dataset.id = dataset_id
+    dataset.tenant_id = tenant_id
+    dataset.indexing_technique = "high_quality"
+    dataset.embedding_model_provider = "openai"
+    dataset.embedding_model = "text-embedding-ada-002"
+    return dataset
+
+
+@pytest.fixture
+def mock_documents(document_ids, dataset_id):
+    """Create mock Document objects."""
+    documents = []
+    for doc_id in document_ids:
+        doc = Mock(spec=Document)
+        doc.id = doc_id
+        doc.dataset_id = dataset_id
+        doc.indexing_status = "waiting"
+        doc.error = None
+        doc.stopped_at = None
+        doc.processing_started_at = None
+        # optional attribute used in some code paths
+        doc.doc_form = "text_model"
+        documents.append(doc)
+    return documents
+
+
+@pytest.fixture
+def mock_indexing_runner():
+    """Mock IndexingRunner for document_indexing_task module."""
+    with patch("tasks.document_indexing_task.IndexingRunner") as mock_runner_class:
+        mock_runner = MagicMock()
+        mock_runner_class.return_value = mock_runner
+        yield mock_runner
+
+
+@pytest.fixture
+def mock_feature_service():
+    """Mock FeatureService for document_indexing_task module."""
+    with patch("tasks.document_indexing_task.FeatureService") as mock_service:
+        mock_features = Mock()
+        mock_features.billing = Mock()
+        mock_features.billing.enabled = False
+        mock_features.vector_space = Mock()
+        mock_features.vector_space.size = 0
+        mock_features.vector_space.limit = 1000
+        mock_service.get_features.return_value = mock_features
+        yield mock_service
+
+
 # ============================================================================
 # Test Task Enqueuing
 # ============================================================================
@@ -166,6 +359,492 @@ class TestTaskEnqueuing:
                 assert mock_redis.lpush.called
                 mock_task.delay.assert_not_called()
 
+    def test_legacy_document_indexing_task_still_works(
+        self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_documents, mock_indexing_runner
+    ):
+        """
+        Test that the legacy document_indexing_task function still works.
+
+        This ensures backward compatibility for existing code that may still
+        use the deprecated function.
+        """
+        # Arrange
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act
+            document_indexing_task(dataset_id, document_ids)
+
+            # Assert
+            mock_indexing_runner.run.assert_called_once()
+
+
+# ============================================================================
+# Test Batch Processing
+# ============================================================================
+
+
+class TestBatchProcessing:
+    """Test cases for batch processing of multiple documents."""
+
+    def test_batch_processing_multiple_documents(
+        self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test batch processing of multiple documents.
+
+        All documents in the batch should be processed together and their
+        status should be updated to 'parsing'.
+        """
+        # Arrange - Create actual document objects that can be modified
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.error = None
+            doc.stopped_at = None
+            doc.processing_started_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act
+            _document_indexing(dataset_id, document_ids)
+
+            # Assert - All documents should be set to 'parsing' status
+            for doc in mock_documents:
+                assert doc.indexing_status == "parsing"
+                assert doc.processing_started_at is not None
+
+            # IndexingRunner should be called with all documents
+            mock_indexing_runner.run.assert_called_once()
+            call_args = mock_indexing_runner.run.call_args[0][0]
+            assert len(call_args) == len(document_ids)
+
+    def test_batch_processing_with_limit_check(self, dataset_id, mock_db_session, mock_dataset, mock_feature_service):
+        """
+        Test batch processing respects upload limits.
+
+        When the number of documents exceeds the batch upload limit,
+        an error should be raised and all documents should be marked as error.
+        """
+        # Arrange
+        batch_limit = 10
+        document_ids = [str(uuid.uuid4()) for _ in range(batch_limit + 1)]
+
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.error = None
+            doc.stopped_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        mock_feature_service.get_features.return_value.billing.enabled = True
+        mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.PROFESSIONAL
+        mock_feature_service.get_features.return_value.vector_space.limit = 1000
+        mock_feature_service.get_features.return_value.vector_space.size = 0
+
+        with patch("tasks.document_indexing_task.dify_config.BATCH_UPLOAD_LIMIT", str(batch_limit)):
+            # Act
+            _document_indexing(dataset_id, document_ids)
+
+            # Assert - All documents should have error status
+            for doc in mock_documents:
+                assert doc.indexing_status == "error"
+                assert doc.error is not None
+                assert "batch upload limit" in doc.error
+
+    def test_batch_processing_sandbox_plan_single_document_only(
+        self, dataset_id, mock_db_session, mock_dataset, mock_feature_service
+    ):
+        """
+        Test that sandbox plan only allows single document upload.
+
+        Sandbox plan should reject batch uploads (more than 1 document).
+        """
+        # Arrange
+        document_ids = [str(uuid.uuid4()) for _ in range(2)]
+
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.error = None
+            doc.stopped_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        mock_feature_service.get_features.return_value.billing.enabled = True
+        mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.SANDBOX
+        mock_feature_service.get_features.return_value.vector_space.limit = 1000
+        mock_feature_service.get_features.return_value.vector_space.size = 0
+
+        # Act
+        _document_indexing(dataset_id, document_ids)
+
+        # Assert - All documents should have error status
+        for doc in mock_documents:
+            assert doc.indexing_status == "error"
+            assert "does not support batch upload" in doc.error
+
+    def test_batch_processing_empty_document_list(
+        self, dataset_id, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test batch processing with empty document list.
+
+        Should handle empty list gracefully without errors.
+        """
+        # Arrange
+        document_ids = []
+
+        # Set shared mock data with empty documents list
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = []
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act
+            _document_indexing(dataset_id, document_ids)
+
+            # Assert - IndexingRunner should still be called with empty list
+            mock_indexing_runner.run.assert_called_once_with([])
+
+
+# ============================================================================
+# Test Progress Tracking
+# ============================================================================
+
+
+class TestProgressTracking:
+    """Test cases for progress tracking through task lifecycle."""
+
+    def test_document_status_progression(
+        self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test document status progresses correctly through lifecycle.
+
+        Documents should transition from 'waiting' -> 'parsing' -> processed.
+        """
+        # Arrange - Create actual document objects
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.processing_started_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act
+            _document_indexing(dataset_id, document_ids)
+
+            # Assert - Status should be 'parsing'
+            for doc in mock_documents:
+                assert doc.indexing_status == "parsing"
+                assert doc.processing_started_at is not None
+
+            # Verify commit was called to persist status
+            assert mock_db_session.commit.called
+
+    def test_processing_started_timestamp_set(
+        self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test that processing_started_at timestamp is set correctly.
+
+        When documents start processing, the timestamp should be recorded.
+        """
+        # Arrange - Create actual document objects
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.processing_started_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act
+            _document_indexing(dataset_id, document_ids)
+
+            # Assert
+            for doc in mock_documents:
+                assert doc.processing_started_at is not None
+
+    def test_tenant_queue_processes_next_task_after_completion(
+        self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test that tenant queue processes next waiting task after completion.
+
+        After a task completes, the system should check for waiting tasks
+        and process the next one.
+        """
+        # Arrange
+        next_task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["next_doc_id"]}
+
+        # Simulate next task in queue
+        from core.rag.pipeline.queue import TaskWrapper
+
+        wrapper = TaskWrapper(data=next_task_data)
+        mock_redis.rpop.return_value = wrapper.serialize()
+
+        mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task:
+                # Act
+                _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task)
+
+                # Assert - Next task should be enqueued
+                mock_task.apply_async.assert_called()
+                # Task key should be set for next task
+                assert mock_redis.setex.called
+
+    def test_tenant_queue_clears_flag_when_no_more_tasks(
+        self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test that tenant queue clears flag when no more tasks are waiting.
+
+        When there are no more tasks in the queue, the task key should be deleted.
+        """
+        # Arrange
+        mock_redis.rpop.return_value = None  # No more tasks
+        mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task:
+                # Act
+                _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task)
+
+                # Assert - Task key should be deleted
+                assert mock_redis.delete.called
+
+
+# ============================================================================
+# Test Error Handling and Retries
+# ============================================================================
+
+
+class TestErrorHandling:
+    """Test cases for error handling and retry mechanisms."""
+
+    def test_error_handling_sets_document_error_status(
+        self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_feature_service
+    ):
+        """
+        Test that errors during validation set document error status.
+
+        When validation fails (e.g., limit exceeded), documents should be
+        marked with error status and error message.
+        """
+        # Arrange - Create actual document objects
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.error = None
+            doc.stopped_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        # Set up to trigger vector space limit error
+        mock_feature_service.get_features.return_value.billing.enabled = True
+        mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.PROFESSIONAL
+        mock_feature_service.get_features.return_value.vector_space.limit = 100
+        mock_feature_service.get_features.return_value.vector_space.size = 100  # At limit
+
+        # Act
+        _document_indexing(dataset_id, document_ids)
+
+        # Assert
+        for doc in mock_documents:
+            assert doc.indexing_status == "error"
+            assert doc.error is not None
+            assert "over the limit" in doc.error
+            assert doc.stopped_at is not None
+
+    def test_error_handling_during_indexing_runner(
+        self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_documents, mock_indexing_runner
+    ):
+        """
+        Test error handling when IndexingRunner raises an exception.
+
+        Errors during indexing should be caught and logged, but not crash the task.
+        """
+        # Arrange
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        # Make IndexingRunner raise an exception
+        mock_indexing_runner.run.side_effect = Exception("Indexing failed")
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act - Should not raise exception
+            _document_indexing(dataset_id, document_ids)
+
+            # Assert - Session should be closed even after error
+            assert mock_db_session.close.called
+
+    def test_document_paused_error_handling(
+        self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_documents, mock_indexing_runner
+    ):
+        """
+        Test handling of DocumentIsPausedError.
+
+        When a document is paused, the error should be caught and logged
+        but not treated as a failure.
+        """
+        # Arrange
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        # Make IndexingRunner raise DocumentIsPausedError
+        mock_indexing_runner.run.side_effect = DocumentIsPausedError("Document is paused")
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act - Should not raise exception
+            _document_indexing(dataset_id, document_ids)
+
+            # Assert - Session should be closed
+            assert mock_db_session.close.called
+
+    def test_dataset_not_found_error_handling(self, dataset_id, document_ids, mock_db_session):
+        """
+        Test handling when dataset is not found.
+
+        If the dataset doesn't exist, the task should exit gracefully.
+        """
+        # Arrange
+        mock_db_session.query.return_value.where.return_value.first.return_value = None
+
+        # Act
+        _document_indexing(dataset_id, document_ids)
+
+        # Assert - Session should be closed
+        assert mock_db_session.close.called
+
+    def test_tenant_queue_error_handling_still_processes_next_task(
+        self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test that errors don't prevent processing next task in tenant queue.
+
+        Even if the current task fails, the next task should still be processed.
+        """
+        # Arrange
+        next_task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": ["next_doc_id"]}
+
+        from core.rag.pipeline.queue import TaskWrapper
+
+        wrapper = TaskWrapper(data=next_task_data)
+        # Set up rpop to return task once for concurrency check
+        mock_redis.rpop.side_effect = [wrapper.serialize(), None]
+
+        mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
+
+        # Make _document_indexing raise an error
+        with patch("tasks.document_indexing_task._document_indexing") as mock_indexing:
+            mock_indexing.side_effect = Exception("Processing failed")
+
+            # Patch logger to avoid format string issue in actual code
+            with patch("tasks.document_indexing_task.logger"):
+                with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task:
+                    # Act
+                    _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task)
+
+                    # Assert - Next task should still be enqueued despite error
+                    mock_task.apply_async.assert_called()
+
+    def test_concurrent_task_limit_respected(
+        self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset
+    ):
+        """
+        Test that tenant isolated task concurrency limit is respected.
+
+        Should pull only TENANT_ISOLATED_TASK_CONCURRENCY tasks at a time.
+        """
+        # Arrange
+        concurrency_limit = 2
+
+        # Create multiple tasks in queue
+        tasks = []
+        for i in range(5):
+            task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": [f"doc_{i}"]}
+            from core.rag.pipeline.queue import TaskWrapper
+
+            wrapper = TaskWrapper(data=task_data)
+            tasks.append(wrapper.serialize())
+
+        # Mock rpop to return tasks one by one
+        mock_redis.rpop.side_effect = tasks[:concurrency_limit] + [None]
+
+        mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
+
+        with patch("tasks.document_indexing_task.dify_config.TENANT_ISOLATED_TASK_CONCURRENCY", concurrency_limit):
+            with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task:
+                # Act
+                _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task)
+
+                # Assert - Should enqueue exactly concurrency_limit tasks
+                assert mock_task.apply_async.call_count == concurrency_limit
+
 
 # ============================================================================
 # Test Task Cancellation
@@ -198,6 +877,407 @@ class TestTaskCancellation:
         assert tenant_2 in queue_2._queue
 
 
+# ============================================================================
+# Integration Tests
+# ============================================================================
+
+
+class TestAdvancedScenarios:
+    """Advanced test scenarios for edge cases and complex workflows."""
+
+    def test_multiple_documents_with_mixed_success_and_failure(
+        self, dataset_id, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test handling of mixed success and failure scenarios in batch processing.
+
+        When processing multiple documents, some may succeed while others fail.
+        This tests that the system handles partial failures gracefully.
+
+        Scenario:
+        - Process 3 documents in a batch
+        - First document succeeds
+        - Second document is not found (skipped)
+        - Third document succeeds
+
+        Expected behavior:
+        - Only found documents are processed
+        - Missing documents are skipped without crashing
+        - IndexingRunner receives only valid documents
+        """
+        # Arrange - Create document IDs with one missing
+        document_ids = [str(uuid.uuid4()) for _ in range(3)]
+
+        # Create only 2 documents (simulate one missing)
+        # The new code uses .all() which will only return existing documents
+        mock_documents = []
+        for i, doc_id in enumerate([document_ids[0], document_ids[2]]):  # Skip middle one
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.processing_started_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data - .all() will only return existing documents
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act
+            _document_indexing(dataset_id, document_ids)
+
+            # Assert - Only 2 documents should be processed (missing one skipped)
+            mock_indexing_runner.run.assert_called_once()
+            call_args = mock_indexing_runner.run.call_args[0][0]
+            assert len(call_args) == 2  # Only found documents
+
+    def test_tenant_queue_with_multiple_concurrent_tasks(
+        self, tenant_id, dataset_id, mock_redis, mock_db_session, mock_dataset
+    ):
+        """
+        Test concurrent task processing with tenant isolation.
+
+        This tests the scenario where multiple tasks are queued for the same tenant
+        and need to be processed respecting the concurrency limit.
+
+        Scenario:
+        - 5 tasks are waiting in the queue
+        - Concurrency limit is 2
+        - After current task completes, pull and enqueue next 2 tasks
+
+        Expected behavior:
+        - Exactly 2 tasks are pulled from queue (respecting concurrency)
+        - Each task is enqueued with correct parameters
+        - Task waiting time is set for each new task
+        """
+        # Arrange
+        concurrency_limit = 2
+        document_ids = [str(uuid.uuid4())]
+
+        # Create multiple waiting tasks
+        waiting_tasks = []
+        for i in range(5):
+            task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": [f"doc_{i}"]}
+            from core.rag.pipeline.queue import TaskWrapper
+
+            wrapper = TaskWrapper(data=task_data)
+            waiting_tasks.append(wrapper.serialize())
+
+        # Mock rpop to return tasks up to concurrency limit
+        mock_redis.rpop.side_effect = waiting_tasks[:concurrency_limit] + [None]
+        mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
+
+        with patch("tasks.document_indexing_task.dify_config.TENANT_ISOLATED_TASK_CONCURRENCY", concurrency_limit):
+            with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task:
+                # Act
+                _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task)
+
+                # Assert
+                # Should enqueue exactly concurrency_limit tasks
+                assert mock_task.apply_async.call_count == concurrency_limit
+
+                # Verify task waiting time was set for each task
+                assert mock_redis.setex.call_count >= concurrency_limit
+
+    def test_vector_space_limit_edge_case_at_exact_limit(
+        self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_feature_service
+    ):
+        """
+        Test vector space limit validation at exact boundary.
+
+        Edge case: When vector space is exactly at the limit (not over),
+        the upload should still be rejected.
+
+        Scenario:
+        - Vector space limit: 100
+        - Current size: 100 (exactly at limit)
+        - Try to upload 3 documents
+
+        Expected behavior:
+        - Upload is rejected with appropriate error message
+        - All documents are marked with error status
+        """
+        # Arrange
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.error = None
+            doc.stopped_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        # Set vector space exactly at limit
+        mock_feature_service.get_features.return_value.billing.enabled = True
+        mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.PROFESSIONAL
+        mock_feature_service.get_features.return_value.vector_space.limit = 100
+        mock_feature_service.get_features.return_value.vector_space.size = 100  # Exactly at limit
+
+        # Act
+        _document_indexing(dataset_id, document_ids)
+
+        # Assert - All documents should have error status
+        for doc in mock_documents:
+            assert doc.indexing_status == "error"
+            assert "over the limit" in doc.error
+
+    def test_task_queue_fifo_ordering(self, tenant_id, dataset_id, mock_redis, mock_db_session, mock_dataset):
+        """
+        Test that tasks are processed in FIFO (First-In-First-Out) order.
+
+        The tenant isolated queue should maintain task order, ensuring
+        that tasks are processed in the sequence they were added.
+
+        Scenario:
+        - Task A added first
+        - Task B added second
+        - Task C added third
+        - When pulling tasks, should get A, then B, then C
+
+        Expected behavior:
+        - Tasks are retrieved in the order they were added
+        - FIFO ordering is maintained throughout processing
+        """
+        # Arrange
+        document_ids = [str(uuid.uuid4())]
+
+        # Create tasks with identifiable document IDs to track order
+        task_order = ["task_A", "task_B", "task_C"]
+        tasks = []
+        for task_name in task_order:
+            task_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": [task_name]}
+            from core.rag.pipeline.queue import TaskWrapper
+
+            wrapper = TaskWrapper(data=task_data)
+            tasks.append(wrapper.serialize())
+
+        # Mock rpop to return tasks in FIFO order
+        mock_redis.rpop.side_effect = tasks + [None]
+        mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
+
+        with patch("tasks.document_indexing_task.dify_config.TENANT_ISOLATED_TASK_CONCURRENCY", 3):
+            with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task:
+                # Act
+                _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task)
+
+                # Assert - Verify tasks were enqueued in correct order
+                assert mock_task.apply_async.call_count == 3
+
+                # Check that document_ids in calls match expected order
+                for i, call_obj in enumerate(mock_task.apply_async.call_args_list):
+                    called_doc_ids = call_obj[1]["kwargs"]["document_ids"]
+                    assert called_doc_ids == [task_order[i]]
+
+    def test_empty_queue_after_task_completion_cleans_up(
+        self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset
+    ):
+        """
+        Test cleanup behavior when queue becomes empty after task completion.
+
+        After processing the last task in the queue, the system should:
+        1. Detect that no more tasks are waiting
+        2. Delete the task key to indicate tenant is idle
+        3. Allow new tasks to start fresh processing
+
+        Scenario:
+        - Process a task
+        - Check queue for next tasks
+        - Queue is empty
+        - Task key should be deleted
+
+        Expected behavior:
+        - Task key is deleted when queue is empty
+        - Tenant is marked as idle (no active tasks)
+        """
+        # Arrange
+        mock_redis.rpop.return_value = None  # Empty queue
+        mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
+
+        with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task:
+            # Act
+            _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task)
+
+            # Assert
+            # Verify delete was called to clean up task key
+            mock_redis.delete.assert_called_once()
+
+            # Verify the correct key was deleted (contains tenant_id and "document_indexing")
+            delete_call_args = mock_redis.delete.call_args[0][0]
+            assert tenant_id in delete_call_args
+            assert "document_indexing" in delete_call_args
+
+    def test_billing_disabled_skips_limit_checks(
+        self, dataset_id, document_ids, mock_db_session, mock_dataset, mock_indexing_runner, mock_feature_service
+    ):
+        """
+        Test that billing limit checks are skipped when billing is disabled.
+
+        For self-hosted or enterprise deployments where billing is disabled,
+        the system should not enforce vector space or batch upload limits.
+
+        Scenario:
+        - Billing is disabled
+        - Upload 100 documents (would normally exceed limits)
+        - No limit checks should be performed
+
+        Expected behavior:
+        - Documents are processed without limit validation
+        - No errors related to limits
+        - All documents proceed to indexing
+        """
+        # Arrange - Create many documents
+        large_batch_ids = [str(uuid.uuid4()) for _ in range(100)]
+
+        mock_documents = []
+        for doc_id in large_batch_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.processing_started_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        # Billing disabled - limits should not be checked
+        mock_feature_service.get_features.return_value.billing.enabled = False
+
+        # Act
+        _document_indexing(dataset_id, large_batch_ids)
+
+        # Assert
+        # All documents should be set to parsing (no limit errors)
+        for doc in mock_documents:
+            assert doc.indexing_status == "parsing"
+
+        # IndexingRunner should be called with all documents
+        mock_indexing_runner.run.assert_called_once()
+        call_args = mock_indexing_runner.run.call_args[0][0]
+        assert len(call_args) == 100
+
+
+class TestIntegration:
+    """Integration tests for complete task workflows."""
+
+    def test_complete_workflow_normal_task(
+        self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test complete workflow for normal document indexing task.
+
+        This tests the full flow from task receipt to completion.
+        """
+        # Arrange - Create actual document objects
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.processing_started_at = None
+            mock_documents.append(doc)
+
+        # Set up rpop to return None for concurrency check (no more tasks)
+        mock_redis.rpop.side_effect = [None]
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act
+            normal_document_indexing_task(tenant_id, dataset_id, document_ids)
+
+            # Assert
+            # Documents should be processed
+            mock_indexing_runner.run.assert_called_once()
+            # Session should be closed
+            assert mock_db_session.close.called
+            # Task key should be deleted (no more tasks)
+            assert mock_redis.delete.called
+
+    def test_complete_workflow_priority_task(
+        self, tenant_id, dataset_id, document_ids, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test complete workflow for priority document indexing task.
+
+        Priority tasks should follow the same flow as normal tasks.
+        """
+        # Arrange - Create actual document objects
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.processing_started_at = None
+            mock_documents.append(doc)
+
+        # Set up rpop to return None for concurrency check (no more tasks)
+        mock_redis.rpop.side_effect = [None]
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            # Act
+            priority_document_indexing_task(tenant_id, dataset_id, document_ids)
+
+            # Assert
+            mock_indexing_runner.run.assert_called_once()
+            assert mock_db_session.close.called
+            assert mock_redis.delete.called
+
+    def test_queue_chain_processing(
+        self, tenant_id, dataset_id, mock_redis, mock_db_session, mock_dataset, mock_indexing_runner
+    ):
+        """
+        Test that multiple tasks in queue are processed in sequence.
+
+        When tasks are queued, they should be processed one after another.
+        """
+        # Arrange
+        task_1_docs = [str(uuid.uuid4())]
+        task_2_docs = [str(uuid.uuid4())]
+
+        task_2_data = {"tenant_id": tenant_id, "dataset_id": dataset_id, "document_ids": task_2_docs}
+
+        from core.rag.pipeline.queue import TaskWrapper
+
+        wrapper = TaskWrapper(data=task_2_data)
+
+        # First call returns task 2, second call returns None
+        mock_redis.rpop.side_effect = [wrapper.serialize(), None]
+
+        mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
+
+        with patch("tasks.document_indexing_task.FeatureService.get_features") as mock_features:
+            mock_features.return_value.billing.enabled = False
+
+            with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task:
+                # Act - Process first task
+                _document_indexing_with_tenant_queue(tenant_id, dataset_id, task_1_docs, mock_task)
+
+                # Assert - Second task should be enqueued
+                assert mock_task.apply_async.called
+                call_args = mock_task.apply_async.call_args
+                assert call_args[1]["kwargs"]["document_ids"] == task_2_docs
+
+
 # ============================================================================
 # Additional Edge Case Tests
 # ============================================================================
@@ -249,6 +1329,107 @@ class TestEdgeCases:
 class TestPerformanceScenarios:
     """Test performance-related scenarios and optimizations."""
 
+    def test_large_document_batch_processing(
+        self, dataset_id, mock_db_session, mock_dataset, mock_indexing_runner, mock_feature_service
+    ):
+        """
+        Test processing a large batch of documents at batch limit.
+
+        When processing the maximum allowed batch size, the system
+        should handle it efficiently without errors.
+
+        Scenario:
+        - Process exactly batch_upload_limit documents (e.g., 50)
+        - All documents are valid
+        - Billing is enabled
+
+        Expected behavior:
+        - All documents are processed successfully
+        - No timeout or memory issues
+        - Batch limit is not exceeded
+        """
+        # Arrange
+        batch_limit = 50
+        document_ids = [str(uuid.uuid4()) for _ in range(batch_limit)]
+
+        mock_documents = []
+        for doc_id in document_ids:
+            doc = MagicMock(spec=Document)
+            doc.id = doc_id
+            doc.dataset_id = dataset_id
+            doc.indexing_status = "waiting"
+            doc.processing_started_at = None
+            mock_documents.append(doc)
+
+        # Set shared mock data so all sessions can access it
+        mock_db_session._shared_data["dataset"] = mock_dataset
+        mock_db_session._shared_data["documents"] = mock_documents
+
+        # Configure billing with sufficient limits
+        mock_feature_service.get_features.return_value.billing.enabled = True
+        mock_feature_service.get_features.return_value.billing.subscription.plan = CloudPlan.PROFESSIONAL
+        mock_feature_service.get_features.return_value.vector_space.limit = 10000
+        mock_feature_service.get_features.return_value.vector_space.size = 0
+
+        with patch("tasks.document_indexing_task.dify_config.BATCH_UPLOAD_LIMIT", str(batch_limit)):
+            # Act
+            _document_indexing(dataset_id, document_ids)
+
+            # Assert
+            for doc in mock_documents:
+                assert doc.indexing_status == "parsing"
+
+            mock_indexing_runner.run.assert_called_once()
+            call_args = mock_indexing_runner.run.call_args[0][0]
+            assert len(call_args) == batch_limit
+
+    def test_tenant_queue_handles_burst_traffic(self, tenant_id, dataset_id, mock_redis, mock_db_session, mock_dataset):
+        """
+        Test tenant queue handling burst traffic scenarios.
+
+        When many tasks arrive in a burst for the same tenant,
+        the queue should handle them efficiently without dropping tasks.
+
+        Scenario:
+        - 20 tasks arrive rapidly
+        - Concurrency limit is 3
+        - Tasks should be queued and processed in batches
+
+        Expected behavior:
+        - First 3 tasks are processed immediately
+        - Remaining tasks wait in queue
+        - No tasks are lost
+        """
+        # Arrange
+        num_tasks = 20
+        concurrency_limit = 3
+        document_ids = [str(uuid.uuid4())]
+
+        # Create waiting tasks
+        waiting_tasks = []
+        for i in range(num_tasks):
+            task_data = {
+                "tenant_id": tenant_id,
+                "dataset_id": dataset_id,
+                "document_ids": [f"doc_{i}"],
+            }
+            from core.rag.pipeline.queue import TaskWrapper
+
+            wrapper = TaskWrapper(data=task_data)
+            waiting_tasks.append(wrapper.serialize())
+
+        # Mock rpop to return tasks up to concurrency limit
+        mock_redis.rpop.side_effect = waiting_tasks[:concurrency_limit] + [None]
+        mock_db_session.query.return_value.where.return_value.first.return_value = mock_dataset
+
+        with patch("tasks.document_indexing_task.dify_config.TENANT_ISOLATED_TASK_CONCURRENCY", concurrency_limit):
+            with patch("tasks.document_indexing_task.normal_document_indexing_task") as mock_task:
+                # Act
+                _document_indexing_with_tenant_queue(tenant_id, dataset_id, document_ids, mock_task)
+
+                # Assert - Should process exactly concurrency_limit tasks
+                assert mock_task.apply_async.call_count == concurrency_limit
+
     def test_multiple_tenants_isolated_processing(self, mock_redis):
         """
         Test that multiple tenants process tasks in isolation.