diff --git a/api/dify_graph/nodes/http_request/entities.py b/api/dify_graph/nodes/http_request/entities.py
index f594d58ae6..da56dd0242 100644
--- a/api/dify_graph/nodes/http_request/entities.py
+++ b/api/dify_graph/nodes/http_request/entities.py
@@ -157,6 +157,25 @@ class Response:
             ):
                 return False
 
+            # Known binary application types — trust Content-Type, skip heuristic byte-sampling
+            known_binary_subtypes = (
+                "pdf",
+                "zip",
+                "gzip",
+                "x-gzip",
+                "octet-stream",
+                "msword",
+                "vnd.openxmlformats",
+                "vnd.ms-excel",
+                "vnd.ms-powerpoint",
+                "x-tar",
+                "x-rar",
+                "x-7z-compressed",
+                "wasm",
+            )
+            if any(subtype in content_type for subtype in known_binary_subtypes):
+                return True
+
             # Try to detect if content is text-based by sampling first few bytes
             try:
                 # Sample first 1024 bytes for text detection
diff --git a/api/tests/unit_tests/core/workflow/nodes/http_request/test_entities.py b/api/tests/unit_tests/core/workflow/nodes/http_request/test_entities.py
index fec6ad90eb..a21acc1d75 100644
--- a/api/tests/unit_tests/core/workflow/nodes/http_request/test_entities.py
+++ b/api/tests/unit_tests/core/workflow/nodes/http_request/test_entities.py
@@ -231,3 +231,40 @@ def test_text_property_with_escaped_unicode(mock_response, json_content, descrip
     # The text should be valid JSON that can be parsed back to proper Unicode
     parsed = json.loads(response.text)
     assert isinstance(parsed, dict), f"Invalid JSON for {description}"
+
+
+@pytest.mark.parametrize(
+    "content_type",
+    [
+        "application/pdf",
+        "application/zip",
+        "application/gzip",
+        "application/x-gzip",
+        "application/octet-stream",
+        "application/msword",
+        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+        "application/vnd.ms-excel",
+        "application/vnd.ms-powerpoint",
+        "application/x-tar",
+        "application/x-rar",
+        "application/x-7z-compressed",
+        "application/wasm",
+    ],
+)
+def test_known_binary_types_skip_heuristic(mock_response, content_type):
+    """Test that known binary MIME types are identified as files even when content is UTF-8 decodable.
+
+    Regression test for https://github.com/langgenius/dify/issues/33897
+    PDFs without a binary marker comment (e.g. Copper PDF) have UTF-8-decodable content
+    containing '<' in dictionary syntax (<< /Type /Catalog), which previously triggered
+    the text_markers heuristic and caused the response to be misidentified as text.
+    """
+    # Simulate a Copper PDF without binary marker — valid UTF-8, contains '<'
+    pdf_content = b"%PDF-1.5\r\n1 0 obj\r\n<< /Type /Catalog /Pages 2 0 R >>\r\nendobj\r\n"
+    mock_response.headers = {"content-type": content_type}
+    type(mock_response).content = PropertyMock(return_value=pdf_content)
+    response = Response(mock_response)
+    assert response.is_file, (
+        f"Known binary type {content_type} should be identified as a file "
+        "even when content is UTF-8 decodable and contains text markers"
+    )