feat: apply markdown rendering to HITL email, sanitize email subject and body (#32305)

This PR:

1. Fixes the bug that email body of `HumanInput` node are sent as-is, without markdown rendering or sanitization
2. Applies HTML sanitization to email subject and body
3. Removes `\r` and `\n` from email subject to prevent SMTP header injection

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Blackoutta 2026-03-16 16:52:46 +08:00 committed by GitHub
parent 4822d550b6
commit 57d476d4e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 229 additions and 9 deletions

View File

@ -8,6 +8,8 @@ from collections.abc import Mapping, Sequence
from datetime import datetime, timedelta from datetime import datetime, timedelta
from typing import Annotated, Any, ClassVar, Literal, Self from typing import Annotated, Any, ClassVar, Literal, Self
import bleach
import markdown
from pydantic import BaseModel, Field, field_validator, model_validator from pydantic import BaseModel, Field, field_validator, model_validator
from dify_graph.entities.base_node_data import BaseNodeData from dify_graph.entities.base_node_data import BaseNodeData
@ -58,6 +60,39 @@ class EmailDeliveryConfig(BaseModel):
"""Configuration for email delivery method.""" """Configuration for email delivery method."""
URL_PLACEHOLDER: ClassVar[str] = "{{#url#}}" URL_PLACEHOLDER: ClassVar[str] = "{{#url#}}"
_SUBJECT_NEWLINE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(r"[\r\n]+")
_ALLOWED_HTML_TAGS: ClassVar[list[str]] = [
"a",
"blockquote",
"br",
"code",
"em",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"hr",
"li",
"ol",
"p",
"pre",
"strong",
"table",
"tbody",
"td",
"th",
"thead",
"tr",
"ul",
]
_ALLOWED_HTML_ATTRIBUTES: ClassVar[dict[str, list[str]]] = {
"a": ["href", "title"],
"td": ["align"],
"th": ["align"],
}
_ALLOWED_PROTOCOLS: ClassVar[list[str]] = ["http", "https", "mailto"]
recipients: EmailRecipients recipients: EmailRecipients
@ -98,6 +133,43 @@ class EmailDeliveryConfig(BaseModel):
return templated_body return templated_body
return variable_pool.convert_template(templated_body).text return variable_pool.convert_template(templated_body).text
@classmethod
def render_markdown_body(cls, body: str) -> str:
"""Render markdown to safe HTML for email delivery."""
sanitized_markdown = bleach.clean(
body,
tags=[],
attributes={},
strip=True,
strip_comments=True,
)
rendered_html = markdown.markdown(
sanitized_markdown,
extensions=["nl2br", "tables"],
extension_configs={"tables": {"use_align_attribute": True}},
)
return bleach.clean(
rendered_html,
tags=cls._ALLOWED_HTML_TAGS,
attributes=cls._ALLOWED_HTML_ATTRIBUTES,
protocols=cls._ALLOWED_PROTOCOLS,
strip=True,
strip_comments=True,
)
@classmethod
def sanitize_subject(cls, subject: str) -> str:
"""Sanitize email subject to plain text and prevent CRLF injection."""
sanitized_subject = bleach.clean(
subject,
tags=[],
attributes={},
strip=True,
strip_comments=True,
)
sanitized_subject = cls._SUBJECT_NEWLINE_PATTERN.sub(" ", sanitized_subject)
return " ".join(sanitized_subject.split())
class _DeliveryMethodBase(BaseModel): class _DeliveryMethodBase(BaseModel):
"""Base delivery method configuration.""" """Base delivery method configuration."""

View File

@ -40,7 +40,7 @@ dependencies = [
"numpy~=1.26.4", "numpy~=1.26.4",
"openpyxl~=3.1.5", "openpyxl~=3.1.5",
"opik~=1.10.37", "opik~=1.10.37",
"litellm==1.82.2", # Pinned to avoid madoka dependency issue "litellm==1.82.2", # Pinned to avoid madoka dependency issue
"opentelemetry-api==1.28.0", "opentelemetry-api==1.28.0",
"opentelemetry-distro==0.49b0", "opentelemetry-distro==0.49b0",
"opentelemetry-exporter-otlp==1.28.0", "opentelemetry-exporter-otlp==1.28.0",
@ -91,6 +91,7 @@ dependencies = [
"apscheduler>=3.11.0", "apscheduler>=3.11.0",
"weave>=0.52.16", "weave>=0.52.16",
"fastopenapi[flask]>=0.7.0", "fastopenapi[flask]>=0.7.0",
"bleach~=6.2.0",
] ]
# Before adding new dependency, consider place it in # Before adding new dependency, consider place it in
# alphabet order (a-z) and suitable group. # alphabet order (a-z) and suitable group.
@ -251,10 +252,7 @@ ignore_errors = true
[tool.pyrefly] [tool.pyrefly]
project-includes = ["."] project-includes = ["."]
project-excludes = [ project-excludes = [".venv", "migrations/"]
".venv",
"migrations/",
]
python-platform = "linux" python-platform = "linux"
python-version = "3.11.0" python-version = "3.11.0"
infer-with-first-use = false infer-with-first-use = false

View File

@ -155,13 +155,15 @@ class EmailDeliveryTestHandler:
context=context, context=context,
recipient_email=recipient_email, recipient_email=recipient_email,
) )
subject = render_email_template(method.config.subject, substitutions) subject_template = render_email_template(method.config.subject, substitutions)
subject = EmailDeliveryConfig.sanitize_subject(subject_template)
templated_body = EmailDeliveryConfig.render_body_template( templated_body = EmailDeliveryConfig.render_body_template(
body=method.config.body, body=method.config.body,
url=substitutions.get("form_link"), url=substitutions.get("form_link"),
variable_pool=context.variable_pool, variable_pool=context.variable_pool,
) )
body = render_email_template(templated_body, substitutions) body = render_email_template(templated_body, substitutions)
body = EmailDeliveryConfig.render_markdown_body(body)
mail.send( mail.send(
to=recipient_email, to=recipient_email,

View File

@ -111,7 +111,7 @@ def _render_body(
url=form_link, url=form_link,
variable_pool=variable_pool, variable_pool=variable_pool,
) )
return body return EmailDeliveryConfig.render_markdown_body(body)
def _load_variable_pool(workflow_run_id: str | None) -> VariablePool | None: def _load_variable_pool(workflow_run_id: str | None) -> VariablePool | None:
@ -173,10 +173,11 @@ def dispatch_human_input_email_task(form_id: str, node_title: str | None = None,
for recipient in job.recipients: for recipient in job.recipients:
form_link = _build_form_link(recipient.token) form_link = _build_form_link(recipient.token)
body = _render_body(job.body, form_link, variable_pool=variable_pool) body = _render_body(job.body, form_link, variable_pool=variable_pool)
subject = EmailDeliveryConfig.sanitize_subject(job.subject)
mail.send( mail.send(
to=recipient.email, to=recipient.email,
subject=job.subject, subject=subject,
html=body, html=body,
) )

View File

@ -14,3 +14,64 @@ def test_render_body_template_replaces_variable_values():
result = config.render_body_template(body=config.body, url="https://example.com", variable_pool=variable_pool) result = config.render_body_template(body=config.body, url="https://example.com", variable_pool=variable_pool)
assert result == "Hello World https://example.com" assert result == "Hello World https://example.com"
def test_render_markdown_body_renders_markdown_to_html():
rendered = EmailDeliveryConfig.render_markdown_body("**Bold** and [link](https://example.com)")
assert "<strong>Bold</strong>" in rendered
assert '<a href="https://example.com">link</a>' in rendered
def test_render_markdown_body_sanitizes_unsafe_html():
rendered = EmailDeliveryConfig.render_markdown_body(
'<script>alert("xss")</script><a href="javascript:alert(1)" onclick="alert(2)">Click</a>'
)
assert "<script" not in rendered
assert "<a" not in rendered
assert "onclick" not in rendered
assert "javascript:" not in rendered
assert "Click" in rendered
def test_render_markdown_body_sanitizes_markdown_link_with_javascript_href():
rendered = EmailDeliveryConfig.render_markdown_body("[bad](javascript:alert(1)) and [ok](https://example.com)")
assert "javascript:" not in rendered
assert "<a>bad</a>" in rendered
assert '<a href="https://example.com">ok</a>' in rendered
def test_render_markdown_body_does_not_allow_raw_html_tags():
rendered = EmailDeliveryConfig.render_markdown_body("<b>raw html</b> and **markdown**")
assert "<b>" not in rendered
assert "raw html" in rendered
assert "<strong>markdown</strong>" in rendered
def test_render_markdown_body_supports_table_syntax():
rendered = EmailDeliveryConfig.render_markdown_body("| h1 | h2 |\n| --- | ---: |\n| v1 | v2 |")
assert "<table>" in rendered
assert "<thead>" in rendered
assert "<tbody>" in rendered
assert 'align="right"' in rendered
assert "style=" not in rendered
def test_sanitize_subject_removes_crlf():
sanitized = EmailDeliveryConfig.sanitize_subject("Notice\r\nBCC:attacker@example.com")
assert "\r" not in sanitized
assert "\n" not in sanitized
assert sanitized == "Notice BCC:attacker@example.com"
def test_sanitize_subject_removes_html_tags():
sanitized = EmailDeliveryConfig.sanitize_subject("<b>Alert</b><img src=x onerror=1>")
assert "<" not in sanitized
assert ">" not in sanitized
assert sanitized == "Alert"

View File

@ -207,6 +207,45 @@ class TestEmailDeliveryTestHandler:
assert kwargs["to"] == "test@example.com" assert kwargs["to"] == "test@example.com"
assert "RENDERED_Subj" in kwargs["subject"] assert "RENDERED_Subj" in kwargs["subject"]
def test_send_test_sanitizes_subject(self, monkeypatch):
monkeypatch.setattr(
service_module.FeatureService,
"get_features",
lambda _id: SimpleNamespace(human_input_email_delivery_enabled=True),
)
monkeypatch.setattr(service_module.mail, "is_inited", lambda: True)
mock_mail_send = MagicMock()
monkeypatch.setattr(service_module.mail, "send", mock_mail_send)
monkeypatch.setattr(
service_module,
"render_email_template",
lambda template, substitutions: template.replace("{{ recipient_email }}", substitutions["recipient_email"]),
)
handler = EmailDeliveryTestHandler(session_factory=MagicMock())
handler._resolve_recipients = MagicMock(return_value=["test@example.com"])
context = DeliveryTestContext(
tenant_id="t1",
app_id="a1",
node_id="n1",
node_title="title",
rendered_content="content",
recipients=[DeliveryTestEmailRecipient(email="test@example.com", form_token="token123")],
)
method = EmailDeliveryMethod(
config=EmailDeliveryConfig(
recipients=EmailRecipients(whole_workspace=False, items=[]),
subject="<b>Notice</b>\r\nBCC:{{ recipient_email }}",
body="Body",
)
)
handler.send_test(context=context, method=method)
_, kwargs = mock_mail_send.call_args
assert kwargs["subject"] == "Notice BCC:test@example.com"
def test_resolve_recipients(self): def test_resolve_recipients(self):
handler = EmailDeliveryTestHandler(session_factory=MagicMock()) handler = EmailDeliveryTestHandler(session_factory=MagicMock())

View File

@ -120,4 +120,37 @@ def test_dispatch_human_input_email_task_replaces_body_variables(monkeypatch: py
session_factory=lambda: _DummySession(form), session_factory=lambda: _DummySession(form),
) )
assert mail.sent[0]["html"] == "Body OK" assert mail.sent[0]["html"] == "<p>Body OK</p>"
@pytest.mark.parametrize("line_break", ["\r\n", "\r", "\n"])
def test_dispatch_human_input_email_task_sanitizes_subject(
monkeypatch: pytest.MonkeyPatch,
line_break: str,
):
mail = _DummyMail()
form = SimpleNamespace(id="form-1", tenant_id="tenant-1", workflow_run_id=None)
job = task_module._EmailDeliveryJob(
form_id="form-1",
subject=f"Notice{line_break}BCC:attacker@example.com <b>Alert</b>",
body="Body",
form_content="content",
recipients=[task_module._EmailRecipient(email="user@example.com", token="token-1")],
)
monkeypatch.setattr(task_module, "mail", mail)
monkeypatch.setattr(
task_module.FeatureService,
"get_features",
lambda _tenant_id: SimpleNamespace(human_input_email_delivery_enabled=True),
)
monkeypatch.setattr(task_module, "_load_email_jobs", lambda _session, _form: [job])
monkeypatch.setattr(task_module, "_load_variable_pool", lambda _workflow_run_id: None)
task_module.dispatch_human_input_email_task(
form_id="form-1",
node_title="Approve",
session_factory=lambda: _DummySession(form),
)
assert mail.sent[0]["subject"] == "Notice BCC:attacker@example.com Alert"

View File

@ -658,6 +658,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/cc/38b6f87170908bd8aaf9e412b021d17e85f690abe00edf50192f1a4566b9/billiard-4.2.3-py3-none-any.whl", hash = "sha256:989e9b688e3abf153f307b68a1328dfacfb954e30a4f920005654e276c69236b", size = 87042, upload-time = "2025-11-16T17:47:29.005Z" }, { url = "https://files.pythonhosted.org/packages/b3/cc/38b6f87170908bd8aaf9e412b021d17e85f690abe00edf50192f1a4566b9/billiard-4.2.3-py3-none-any.whl", hash = "sha256:989e9b688e3abf153f307b68a1328dfacfb954e30a4f920005654e276c69236b", size = 87042, upload-time = "2025-11-16T17:47:29.005Z" },
] ]
[[package]]
name = "bleach"
version = "6.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "webencodings" },
]
sdist = { url = "https://files.pythonhosted.org/packages/76/9a/0e33f5054c54d349ea62c277191c020c2d6ef1d65ab2cb1993f91ec846d1/bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f", size = 203083, upload-time = "2024-10-29T18:30:40.477Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e", size = 163406, upload-time = "2024-10-29T18:30:38.186Z" },
]
[[package]] [[package]]
name = "blinker" name = "blinker"
version = "1.9.0" version = "1.9.0"
@ -1529,6 +1541,7 @@ dependencies = [
{ name = "arize-phoenix-otel" }, { name = "arize-phoenix-otel" },
{ name = "azure-identity" }, { name = "azure-identity" },
{ name = "beautifulsoup4" }, { name = "beautifulsoup4" },
{ name = "bleach" },
{ name = "boto3" }, { name = "boto3" },
{ name = "bs4" }, { name = "bs4" },
{ name = "cachetools" }, { name = "cachetools" },
@ -1730,6 +1743,7 @@ requires-dist = [
{ name = "arize-phoenix-otel", specifier = "~=0.15.0" }, { name = "arize-phoenix-otel", specifier = "~=0.15.0" },
{ name = "azure-identity", specifier = "==1.25.3" }, { name = "azure-identity", specifier = "==1.25.3" },
{ name = "beautifulsoup4", specifier = "==4.14.3" }, { name = "beautifulsoup4", specifier = "==4.14.3" },
{ name = "bleach", specifier = "~=6.2.0" },
{ name = "boto3", specifier = "==1.42.68" }, { name = "boto3", specifier = "==1.42.68" },
{ name = "bs4", specifier = "~=0.0.1" }, { name = "bs4", specifier = "~=0.0.1" },
{ name = "cachetools", specifier = "~=5.3.0" }, { name = "cachetools", specifier = "~=5.3.0" },