chore(version): bump version to 1.7.2 (#23740 )

feat: enhance GotoAnything UX with @ command selector (#23738 )
chore: remove debug log statements from DifyAPIRepositoryFactory (#23734 )
2025-08-11 17:12:44 +08:00 · 2025-08-11 15:47:19 +08:00 · 2025-08-11 15:39:20 +08:00 · 2025-08-11 15:38:28 +08:00 · 2025-08-11 15:34:19 +08:00 · 2025-08-11 14:39:22 +08:00
733 changed files with 36888 additions and 4601 deletions
--- a/.env.example
+++ b/.env.example
--- a/.github/ISSUE_TEMPLATE/chore.yaml
+++ b/.github/ISSUE_TEMPLATE/chore.yaml
@@ -0,0 +1,44 @@
+name: "✨ Refactor"
+description: Refactor existing code for improved readability and maintainability.
+title: "[Chore/Refactor] "
+labels:
+  - refactor
+body:
+  - type: checkboxes
+    attributes:
+      label: Self Checks
+      description: "To make sure we get to you in time, please check the following :)"
+      options:
+        - label: I have read the [Contributing Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) and [Language Policy](https://github.com/langgenius/dify/issues/1542).
+          required: true
+        - label: This is only for refactoring, if you would like to ask a question, please head to [Discussions](https://github.com/langgenius/dify/discussions/categories/general).
+          required: true
+        - label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
+          required: true
+        - label: I confirm that I am using English to submit this report, otherwise it will be closed.
+          required: true
+        - label: 【中文用户 & Non English User】请使用英语提交，否则会被关闭 ：）
+          required: true
+        - label: "Please do not modify this template :) and fill in all the required fields."
+          required: true
+  - type: textarea
+    id: description
+    attributes:
+      label: Description
+      placeholder: "Describe the refactor you are proposing."
+    validations:
+      required: true
+  - type: textarea
+    id: motivation
+    attributes:
+      label: Motivation
+      placeholder: "Explain why this refactor is necessary."
+    validations:
+      required: false
+  - type: textarea
+    id: additional-context
+    attributes:
+      label: Additional Context
+      placeholder: "Add any other context or screenshots about the request here."
+    validations:
+      required: false
--- a/.github/workflows/api-tests.yml
+++ b/.github/workflows/api-tests.yml
@@ -99,3 +99,6 @@ jobs:

      - name: Run Tool
        run: uv run --project api bash dev/pytest/pytest_tools.sh
+
+      - name: Run TestContainers
+        run: uv run --project api bash dev/pytest/pytest_testcontainers.sh
--- a/.github/workflows/autofix.yml
+++ b/.github/workflows/autofix.yml
@@ -9,6 +9,7 @@ permissions:

 jobs:
  autofix:
+    if: github.repository == 'langgenius/dify'
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@@ -7,6 +7,7 @@ on:
      - "deploy/dev"
      - "deploy/enterprise"
      - "build/**"
+      - "release/e-*"
    tags:
      - "*"

--- a/.github/workflows/translate-i18n-base-on-english.yml
+++ b/.github/workflows/translate-i18n-base-on-english.yml
@@ -1,13 +1,18 @@
 name: Check i18n Files and Create PR

 on:
-  pull_request:
-    types: [closed]
+  push:
    branches: [main]
+    paths:
+      - 'web/i18n/en-US/*.ts'
+
+permissions:
+  contents: write
+  pull-requests: write

 jobs:
  check-and-update:
-    if: github.event.pull_request.merged == true
+    if: github.repository == 'langgenius/dify'
    runs-on: ubuntu-latest
    defaults:
      run:
@@ -15,8 +20,8 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
-          fetch-depth: 2 # last 2 commits
-          persist-credentials: false
+          fetch-depth: 2
+          token: ${{ secrets.GITHUB_TOKEN }}

      - name: Check for file changes in i18n/en-US
        id: check_files
@@ -27,6 +32,13 @@ jobs:
          echo "Changed files: $changed_files"
          if [ -n "$changed_files" ]; then
            echo "FILES_CHANGED=true" >> $GITHUB_ENV
+            file_args=""
+            for file in $changed_files; do
+              filename=$(basename "$file" .ts)
+              file_args="$file_args --file=$filename"
+            done
+            echo "FILE_ARGS=$file_args" >> $GITHUB_ENV
+            echo "File arguments: $file_args"
          else
            echo "FILES_CHANGED=false" >> $GITHUB_ENV
          fi
@@ -49,14 +61,15 @@ jobs:
        if: env.FILES_CHANGED == 'true'
        run: pnpm install --frozen-lockfile

-      - name: Run npm script
+      - name: Generate i18n translations
        if: env.FILES_CHANGED == 'true'
-        run: pnpm run auto-gen-i18n
+        run: pnpm run auto-gen-i18n ${{ env.FILE_ARGS }}

      - name: Create Pull Request
        if: env.FILES_CHANGED == 'true'
        uses: peter-evans/create-pull-request@v6
        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
          commit-message: Update i18n files based on en-US changes
          title: 'chore: translate i18n files'
          body: This PR was automatically created to update i18n files based on changes in en-US locale.
--- a/.gitignore
+++ b/.gitignore
@@ -215,3 +215,4 @@ mise.toml
 # AI Assistant
 .roo/
 api/.env.backup
+/clickzetta
--- a/README.md
+++ b/README.md
@@ -235,13 +235,17 @@ Quickly deploy Dify to Alibaba cloud with [Alibaba Cloud Computing Nest](https:/

 One-Click deploy Dify to Alibaba Cloud with [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) 

+#### Deploy to AKS with Azure Devops Pipeline
+
+One-Click deploy Dify to AKS with [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) 
+

 ## Contributing

 For those who'd like to contribute code, see our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
 At the same time, please consider supporting Dify by sharing it on social media and at events and conferences.

-> We are looking for contributors to help translate Dify into languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
+> We are looking for contributors to help translate Dify into languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).

 ## Community & contact

--- a/README_AR.md
+++ b/README_AR.md
@@ -217,13 +217,17 @@ docker compose up -d

 انشر Dify على علي بابا كلاود بنقرة واحدة باستخدام [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)

+#### استخدام Azure Devops Pipeline للنشر على AKS
+
+انشر Dify على AKS بنقرة واحدة باستخدام [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
+

 ## المساهمة

 لأولئك الذين يرغبون في المساهمة، انظر إلى [دليل المساهمة](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) لدينا.
 في الوقت نفسه، يرجى النظر في دعم Dify عن طريق مشاركته على وسائل التواصل الاجتماعي وفي الفعاليات والمؤتمرات.

-> نحن نبحث عن مساهمين لمساعدة في ترجمة Dify إلى لغات أخرى غير اللغة الصينية المندرين أو الإنجليزية. إذا كنت مهتمًا بالمساعدة، يرجى الاطلاع على [README للترجمة](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) لمزيد من المعلومات، واترك لنا تعليقًا في قناة `global-users` على [خادم المجتمع على Discord](https://discord.gg/8Tpq4AcN9c).
+> نحن نبحث عن مساهمين لمساعدة في ترجمة Dify إلى لغات أخرى غير اللغة الصينية المندرين أو الإنجليزية. إذا كنت مهتمًا بالمساعدة، يرجى الاطلاع على [README للترجمة](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) لمزيد من المعلومات، واترك لنا تعليقًا في قناة `global-users` على [خادم المجتمع على Discord](https://discord.gg/8Tpq4AcN9c).

 **المساهمون**

--- a/README_BN.md
+++ b/README_BN.md
@@ -235,13 +235,17 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন

 [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)

+ #### AKS-এ ডিপ্লয় করার জন্য Azure Devops Pipeline ব্যবহার
+
+[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) ব্যবহার করে Dify কে AKS-এ এক ক্লিকে ডিপ্লয় করুন
+

 ## Contributing

 যারা কোড অবদান রাখতে চান, তাদের জন্য আমাদের [অবদান নির্দেশিকা] দেখুন (https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)।
 একই সাথে, সোশ্যাল মিডিয়া এবং ইভেন্ট এবং কনফারেন্সে এটি শেয়ার করে Dify কে সমর্থন করুন।

-> আমরা ম্যান্ডারিন বা ইংরেজি ছাড়া অন্য ভাষায় Dify অনুবাদ করতে সাহায্য করার জন্য অবদানকারীদের খুঁজছি। আপনি যদি সাহায্য করতে আগ্রহী হন, তাহলে আরও তথ্যের জন্য [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) দেখুন এবং আমাদের [ডিসকর্ড কমিউনিটি সার্ভার](https://discord.gg/8Tpq4AcN9c) এর `গ্লোবাল-ইউজারস` চ্যানেলে আমাদের একটি মন্তব্য করুন।
+> আমরা ম্যান্ডারিন বা ইংরেজি ছাড়া অন্য ভাষায় Dify অনুবাদ করতে সাহায্য করার জন্য অবদানকারীদের খুঁজছি। আপনি যদি সাহায্য করতে আগ্রহী হন, তাহলে আরও তথ্যের জন্য [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) দেখুন এবং আমাদের [ডিসকর্ড কমিউনিটি সার্ভার](https://discord.gg/8Tpq4AcN9c) এর `গ্লোবাল-ইউজারস` চ্যানেলে আমাদের একটি মন্তব্য করুন।

 ## কমিউনিটি এবং যোগাযোগ

--- a/README_CN.md
+++ b/README_CN.md
@@ -233,6 +233,9 @@ docker compose up -d

 使用 [阿里云数据管理DMS](https://help.aliyun.com/zh/dms/dify-in-invitational-preview) 将 Dify 一键部署到 阿里云

+#### 使用 Azure Devops Pipeline 部署到AKS
+
+使用[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) 将 Dify 一键部署到 AKS

 ## Star History

@@ -244,7 +247,7 @@ docker compose up -d
 对于那些想要贡献代码的人，请参阅我们的[贡献指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。
 同时，请考虑通过社交媒体、活动和会议来支持 Dify 的分享。

-> 我们正在寻找贡献者来帮助将 Dify 翻译成除了中文和英文之外的其他语言。如果您有兴趣帮助，请参阅我们的[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)获取更多信息，并在我们的[Discord 社区服务器](https://discord.gg/8Tpq4AcN9c)的`global-users`频道中留言。
+> 我们正在寻找贡献者来帮助将 Dify 翻译成除了中文和英文之外的其他语言。如果您有兴趣帮助，请参阅我们的[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)获取更多信息，并在我们的[Discord 社区服务器](https://discord.gg/8Tpq4AcN9c)的`global-users`频道中留言。

 **Contributors**

--- a/README_DE.md
+++ b/README_DE.md
@@ -230,13 +230,17 @@ Bereitstellung von Dify auf AWS mit [CDK](https://aws.amazon.com/cdk/)

 Ein-Klick-Bereitstellung von Dify in der Alibaba Cloud mit [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)

+#### Verwendung von Azure Devops Pipeline für AKS-Bereitstellung
+
+Stellen Sie Dify mit einem Klick in AKS bereit, indem Sie [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) verwenden
+

 ## Contributing

 Falls Sie Code beitragen möchten, lesen Sie bitte unseren [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). Gleichzeitig bitten wir Sie, Dify zu unterstützen, indem Sie es in den sozialen Medien teilen und auf Veranstaltungen und Konferenzen präsentieren.


-> Wir suchen Mitwirkende, die dabei helfen, Dify in weitere Sprachen zu übersetzen – außer Mandarin oder Englisch. Wenn Sie Interesse an einer Mitarbeit haben, lesen Sie bitte die [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) für weitere Informationen und hinterlassen Sie einen Kommentar im `global-users`-Kanal unseres [Discord Community Servers](https://discord.gg/8Tpq4AcN9c).
+> Wir suchen Mitwirkende, die dabei helfen, Dify in weitere Sprachen zu übersetzen – außer Mandarin oder Englisch. Wenn Sie Interesse an einer Mitarbeit haben, lesen Sie bitte die [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) für weitere Informationen und hinterlassen Sie einen Kommentar im `global-users`-Kanal unseres [Discord Community Servers](https://discord.gg/8Tpq4AcN9c).

 ## Gemeinschaft & Kontakt

--- a/README_ES.md
+++ b/README_ES.md
@@ -230,6 +230,10 @@ Despliegue Dify en AWS usando [CDK](https://aws.amazon.com/cdk/)

 Despliega Dify en Alibaba Cloud con un solo clic con [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)

+#### Uso de Azure Devops Pipeline para implementar en AKS
+
+Implementa Dify en AKS con un clic usando [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
+

 ## Contribuir

@@ -237,7 +241,7 @@ Para aquellos que deseen contribuir con código, consulten nuestra [Guía de con
 Al mismo tiempo, considera apoyar a Dify compartiéndolo en redes sociales y en eventos y conferencias.


-> Estamos buscando colaboradores para ayudar con la traducción de Dify a idiomas que no sean el mandarín o el inglés. Si estás interesado en ayudar, consulta el [README de i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para obtener más información y déjanos un comentario en el canal `global-users` de nuestro [Servidor de Comunidad en Discord](https://discord.gg/8Tpq4AcN9c).
+> Estamos buscando colaboradores para ayudar con la traducción de Dify a idiomas que no sean el mandarín o el inglés. Si estás interesado en ayudar, consulta el [README de i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para obtener más información y déjanos un comentario en el canal `global-users` de nuestro [Servidor de Comunidad en Discord](https://discord.gg/8Tpq4AcN9c).

 **Contribuidores**

--- a/README_FR.md
+++ b/README_FR.md
@@ -228,6 +228,10 @@ Déployez Dify sur AWS en utilisant [CDK](https://aws.amazon.com/cdk/)

 Déployez Dify en un clic sur Alibaba Cloud avec [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)

+#### Utilisation d'Azure Devops Pipeline pour déployer sur AKS
+
+Déployez Dify sur AKS en un clic en utilisant [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
+

 ## Contribuer

@@ -235,7 +239,7 @@ Pour ceux qui souhaitent contribuer du code, consultez notre [Guide de contribut
 Dans le même temps, veuillez envisager de soutenir Dify en le partageant sur les réseaux sociaux et lors d'événements et de conférences.


-> Nous recherchons des contributeurs pour aider à traduire Dify dans des langues autres que le mandarin ou l'anglais. Si vous êtes intéressé à aider, veuillez consulter le [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) pour plus d'informations, et laissez-nous un commentaire dans le canal `global-users` de notre [Serveur communautaire Discord](https://discord.gg/8Tpq4AcN9c).
+> Nous recherchons des contributeurs pour aider à traduire Dify dans des langues autres que le mandarin ou l'anglais. Si vous êtes intéressé à aider, veuillez consulter le [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) pour plus d'informations, et laissez-nous un commentaire dans le canal `global-users` de notre [Serveur communautaire Discord](https://discord.gg/8Tpq4AcN9c).

 **Contributeurs**

--- a/README_JA.md
+++ b/README_JA.md
@@ -227,6 +227,10 @@ docker compose up -d
 #### Alibaba Cloud Data Management
 [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) を利用して、DifyをAlibaba Cloudへワンクリックでデプロイできます

+#### AKSへのデプロイにAzure Devops Pipelineを使用
+
+[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)を使用してDifyをAKSにワンクリックでデプロイ
+

 ## 貢献

@@ -234,7 +238,7 @@ docker compose up -d
 同時に、DifyをSNSやイベント、カンファレンスで共有してサポートしていただけると幸いです。


-> Difyを英語または中国語以外の言語に翻訳してくれる貢献者を募集しています。興味がある場合は、詳細については[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)を参照してください。また、[Discordコミュニティサーバー](https://discord.gg/8Tpq4AcN9c)の`global-users`チャンネルにコメントを残してください。
+> Difyを英語または中国語以外の言語に翻訳してくれる貢献者を募集しています。興味がある場合は、詳細については[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)を参照してください。また、[Discordコミュニティサーバー](https://discord.gg/8Tpq4AcN9c)の`global-users`チャンネルにコメントを残してください。

 **貢献者**

--- a/README_KL.md
+++ b/README_KL.md
@@ -228,6 +228,10 @@ wa'logh nIqHom neH ghun deployment toy'wI' [CDK](https://aws.amazon.com/cdk/) lo

 [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)

+#### AKS 'e' Deploy je Azure Devops Pipeline lo'laH
+
+[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) lo'laH Dify AKS 'e' wa'DIch click 'e' Deploy
+

 ## Contributing

@@ -235,7 +239,7 @@ For those who'd like to contribute code, see our [Contribution Guide](https://gi
 At the same time, please consider supporting Dify by sharing it on social media and at events and conferences.


-> We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
+> We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).

 **Contributors**

--- a/README_KR.md
+++ b/README_KR.md
@@ -222,6 +222,10 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했

 [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)를 통해 원클릭으로 Dify를 Alibaba Cloud에 배포할 수 있습니다

+#### AKS에 배포하기 위해 Azure Devops Pipeline 사용
+
+[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)을 사용하여 Dify를 AKS에 원클릭으로 배포
+

 ## 기여

@@ -229,7 +233,7 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했
 동시에 Dify를 소셜 미디어와 행사 및 컨퍼런스에 공유하여 지원하는 것을 고려해 주시기 바랍니다.


-> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요. 
+> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요. 

 **기여자**

--- a/README_PT.md
+++ b/README_PT.md
@@ -227,13 +227,17 @@ Implante o Dify na AWS usando [CDK](https://aws.amazon.com/cdk/)

 Implante o Dify na Alibaba Cloud com um clique usando o [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)

+#### Usando Azure Devops Pipeline para Implantar no AKS
+
+Implante o Dify no AKS com um clique usando [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
+

 ## Contribuindo

 Para aqueles que desejam contribuir com código, veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). 
 Ao mesmo tempo, considere apoiar o Dify compartilhando-o nas redes sociais e em eventos e conferências.

-> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c).
+> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c).

 **Contribuidores**

--- a/README_SI.md
+++ b/README_SI.md
@@ -228,6 +228,10 @@ Uvedite Dify v AWS z uporabo [CDK](https://aws.amazon.com/cdk/)

 Z enim klikom namestite Dify na Alibaba Cloud z [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)

+#### Uporaba Azure Devops Pipeline za uvajanje v AKS
+
+Z enim klikom namestite Dify v AKS z uporabo [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
+

 ## Prispevam

--- a/README_TR.md
+++ b/README_TR.md
@@ -221,13 +221,17 @@ Dify'ı bulut platformuna tek tıklamayla dağıtın [terraform](https://www.ter

 [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) kullanarak Dify'ı tek tıkla Alibaba Cloud'a dağıtın

+#### AKS'ye Dağıtım için Azure Devops Pipeline Kullanımı
+
+[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) kullanarak Dify'ı tek tıkla AKS'ye dağıtın
+

 ## Katkıda Bulunma

 Kod katkısında bulunmak isteyenler için [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakabilirsiniz.
 Aynı zamanda, lütfen Dify'ı sosyal medyada, etkinliklerde ve konferanslarda paylaşarak desteklemeyi düşünün.

-> Dify'ı Mandarin veya İngilizce dışındaki dillere çevirmemize yardımcı olacak katkıda bulunanlara ihtiyacımız var. Yardımcı olmakla ilgileniyorsanız, lütfen daha fazla bilgi için [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) dosyasına bakın ve [Discord Topluluk Sunucumuzdaki](https://discord.gg/8Tpq4AcN9c) `global-users` kanalında bize bir yorum bırakın.
+> Dify'ı Mandarin veya İngilizce dışındaki dillere çevirmemize yardımcı olacak katkıda bulunanlara ihtiyacımız var. Yardımcı olmakla ilgileniyorsanız, lütfen daha fazla bilgi için [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) dosyasına bakın ve [Discord Topluluk Sunucumuzdaki](https://discord.gg/8Tpq4AcN9c) `global-users` kanalında bize bir yorum bırakın.

 **Katkıda Bulunanlar**

--- a/README_TW.md
+++ b/README_TW.md
@@ -233,13 +233,17 @@ Dify 的所有功能都提供相應的 API，因此您可以輕鬆地將 Dify

 透過 [阿里雲數據管理DMS](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)，一鍵將 Dify 部署至阿里雲

+#### 使用 Azure Devops Pipeline 部署到AKS
+
+使用[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) 將 Dify 一鍵部署到 AKS
+

 ## 貢獻

 對於想要貢獻程式碼的開發者，請參閱我們的[貢獻指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。
 同時，也請考慮透過在社群媒體和各種活動與會議上分享 Dify 來支持我們。

-> 我們正在尋找貢獻者協助將 Dify 翻譯成中文和英文以外的語言。如果您有興趣幫忙，請查看 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) 獲取更多資訊，並在我們的 [Discord 社群伺服器](https://discord.gg/8Tpq4AcN9c) 的 `global-users` 頻道留言給我們。
+> 我們正在尋找貢獻者協助將 Dify 翻譯成中文和英文以外的語言。如果您有興趣幫忙，請查看 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) 獲取更多資訊，並在我們的 [Discord 社群伺服器](https://discord.gg/8Tpq4AcN9c) 的 `global-users` 頻道留言給我們。

 ## 社群與聯絡方式

--- a/README_VI.md
+++ b/README_VI.md
@@ -224,6 +224,10 @@ Triển khai Dify trên AWS bằng [CDK](https://aws.amazon.com/cdk/)

 Triển khai Dify lên Alibaba Cloud chỉ với một cú nhấp chuột bằng [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)

+#### Sử dụng Azure Devops Pipeline để Triển khai lên AKS
+
+Triển khai Dify lên AKS chỉ với một cú nhấp chuột bằng [Azure Devops Pipeline Helm Chart bởi @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
+

 ## Đóng góp

@@ -231,7 +235,7 @@ Triển khai Dify lên Alibaba Cloud chỉ với một cú nhấp chuột bằng
 Đồng thời, vui lòng xem xét hỗ trợ Dify bằng cách chia sẻ nó trên mạng xã hội và tại các sự kiện và hội nghị.


-> Chúng tôi đang tìm kiếm người đóng góp để giúp dịch Dify sang các ngôn ngữ khác ngoài tiếng Trung hoặc tiếng Anh. Nếu bạn quan tâm đến việc giúp đỡ, vui lòng xem [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) để biết thêm thông tin và để lại bình luận cho chúng tôi trong kênh `global-users` của [Máy chủ Cộng đồng Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi.
+> Chúng tôi đang tìm kiếm người đóng góp để giúp dịch Dify sang các ngôn ngữ khác ngoài tiếng Trung hoặc tiếng Anh. Nếu bạn quan tâm đến việc giúp đỡ, vui lòng xem [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) để biết thêm thông tin và để lại bình luận cho chúng tôi trong kênh `global-users` của [Máy chủ Cộng đồng Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi.

 **Người đóng góp**

--- a/api/.env.example
+++ b/api/.env.example
@@ -232,6 +232,7 @@ TABLESTORE_ENDPOINT=https://instance-name.cn-hangzhou.ots.aliyuncs.com
 TABLESTORE_INSTANCE_NAME=instance-name
 TABLESTORE_ACCESS_KEY_ID=xxx
 TABLESTORE_ACCESS_KEY_SECRET=xxx
+TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE=false

 # Tidb Vector configuration
 TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com
--- a/api/Dockerfile
+++ b/api/Dockerfile
@@ -19,7 +19,7 @@ RUN apt-get update \

 # Install Python dependencies
 COPY pyproject.toml uv.lock ./
-RUN uv sync --locked
+RUN uv sync --locked --no-dev

 # production stage
 FROM base AS production
--- a/api/commands.py
+++ b/api/commands.py
@@ -5,10 +5,11 @@ import secrets
 from typing import Any, Optional

 import click
+import sqlalchemy as sa
 from flask import current_app
 from pydantic import TypeAdapter
 from sqlalchemy import select
-from werkzeug.exceptions import NotFound
+from sqlalchemy.exc import SQLAlchemyError

 from configs import dify_config
 from constants.languages import languages
@@ -180,8 +181,8 @@ def migrate_annotation_vector_database():
            )
            if not apps:
                break
-        except NotFound:
-            break
+        except SQLAlchemyError:
+            raise

        page += 1
        for app in apps:
@@ -307,8 +308,8 @@ def migrate_knowledge_vector_database():
            )

            datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
-        except NotFound:
-            break
+        except SQLAlchemyError:
+            raise

        page += 1
        for dataset in datasets:
@@ -457,7 +458,7 @@ def convert_to_agent_apps():
        """

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query))
+            rs = conn.execute(sa.text(sql_query))

            apps = []
            for i in rs:
@@ -560,8 +561,8 @@ def old_metadata_migration():
                .order_by(DatasetDocument.created_at.desc())
            )
            documents = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
-        except NotFound:
-            break
+        except SQLAlchemyError:
+            raise
        if not documents:
            break
        for document in documents:
@@ -702,7 +703,7 @@ def fix_app_site_missing():
        sql = """select apps.id as id from apps left join sites on sites.app_id=apps.id
 where sites.id is null limit 1000"""
        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql))
+            rs = conn.execute(sa.text(sql))

            processed_count = 0
            for i in rs:
@@ -916,7 +917,7 @@ def clear_orphaned_file_records(force: bool):
        )
        orphaned_message_files = []
        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(query))
+            rs = conn.execute(sa.text(query))
            for i in rs:
                orphaned_message_files.append({"id": str(i[0]), "message_id": str(i[1])})

@@ -937,7 +938,7 @@ def clear_orphaned_file_records(force: bool):
            click.echo(click.style("- Deleting orphaned message_files records", fg="white"))
            query = "DELETE FROM message_files WHERE id IN :ids"
            with db.engine.begin() as conn:
-                conn.execute(db.text(query), {"ids": tuple([record["id"] for record in orphaned_message_files])})
+                conn.execute(sa.text(query), {"ids": tuple([record["id"] for record in orphaned_message_files])})
            click.echo(
                click.style(f"Removed {len(orphaned_message_files)} orphaned message_files records.", fg="green")
            )
@@ -954,7 +955,7 @@ def clear_orphaned_file_records(force: bool):
            click.echo(click.style(f"- Listing file records in table {files_table['table']}", fg="white"))
            query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}"
            with db.engine.begin() as conn:
-                rs = conn.execute(db.text(query))
+                rs = conn.execute(sa.text(query))
            for i in rs:
                all_files_in_tables.append({"table": files_table["table"], "id": str(i[0]), "key": i[1]})
        click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white"))
@@ -974,7 +975,7 @@ def clear_orphaned_file_records(force: bool):
                    f"SELECT {ids_table['column']} FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL"
                )
                with db.engine.begin() as conn:
-                    rs = conn.execute(db.text(query))
+                    rs = conn.execute(sa.text(query))
                for i in rs:
                    all_ids_in_tables.append({"table": ids_table["table"], "id": str(i[0])})
            elif ids_table["type"] == "text":
@@ -989,7 +990,7 @@ def clear_orphaned_file_records(force: bool):
                    f"FROM {ids_table['table']}"
                )
                with db.engine.begin() as conn:
-                    rs = conn.execute(db.text(query))
+                    rs = conn.execute(sa.text(query))
                for i in rs:
                    for j in i[0]:
                        all_ids_in_tables.append({"table": ids_table["table"], "id": j})
@@ -1008,7 +1009,7 @@ def clear_orphaned_file_records(force: bool):
                    f"FROM {ids_table['table']}"
                )
                with db.engine.begin() as conn:
-                    rs = conn.execute(db.text(query))
+                    rs = conn.execute(sa.text(query))
                for i in rs:
                    for j in i[0]:
                        all_ids_in_tables.append({"table": ids_table["table"], "id": j})
@@ -1037,7 +1038,7 @@ def clear_orphaned_file_records(force: bool):
            click.echo(click.style(f"- Deleting orphaned file records in table {files_table['table']}", fg="white"))
            query = f"DELETE FROM {files_table['table']} WHERE {files_table['id_column']} IN :ids"
            with db.engine.begin() as conn:
-                conn.execute(db.text(query), {"ids": tuple(orphaned_files)})
+                conn.execute(sa.text(query), {"ids": tuple(orphaned_files)})
    except Exception as e:
        click.echo(click.style(f"Error deleting orphaned file records: {str(e)}", fg="red"))
        return
@@ -1107,7 +1108,7 @@ def remove_orphaned_files_on_storage(force: bool):
            click.echo(click.style(f"- Listing files from table {files_table['table']}", fg="white"))
            query = f"SELECT {files_table['key_column']} FROM {files_table['table']}"
            with db.engine.begin() as conn:
-                rs = conn.execute(db.text(query))
+                rs = conn.execute(sa.text(query))
            for i in rs:
                all_files_in_tables.append(str(i[0]))
        click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white"))
--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@@ -330,17 +330,17 @@ class HttpConfig(BaseSettings):
    def WEB_API_CORS_ALLOW_ORIGINS(self) -> list[str]:
        return self.inner_WEB_API_CORS_ALLOW_ORIGINS.split(",")

-    HTTP_REQUEST_MAX_CONNECT_TIMEOUT: Annotated[
-        PositiveInt, Field(ge=10, description="Maximum connection timeout in seconds for HTTP requests")
-    ] = 10
+    HTTP_REQUEST_MAX_CONNECT_TIMEOUT: int = Field(
+        ge=1, description="Maximum connection timeout in seconds for HTTP requests", default=10
+    )

-    HTTP_REQUEST_MAX_READ_TIMEOUT: Annotated[
-        PositiveInt, Field(ge=60, description="Maximum read timeout in seconds for HTTP requests")
-    ] = 60
+    HTTP_REQUEST_MAX_READ_TIMEOUT: int = Field(
+        ge=1, description="Maximum read timeout in seconds for HTTP requests", default=60
+    )

-    HTTP_REQUEST_MAX_WRITE_TIMEOUT: Annotated[
-        PositiveInt, Field(ge=10, description="Maximum write timeout in seconds for HTTP requests")
-    ] = 20
+    HTTP_REQUEST_MAX_WRITE_TIMEOUT: int = Field(
+        ge=1, description="Maximum write timeout in seconds for HTTP requests", default=20
+    )

    HTTP_REQUEST_NODE_MAX_BINARY_SIZE: PositiveInt = Field(
        description="Maximum allowed size in bytes for binary data in HTTP requests",
--- a/api/configs/middleware/init.py
+++ b/api/configs/middleware/init.py
@@ -10,6 +10,7 @@ from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
 from .storage.amazon_s3_storage_config import S3StorageConfig
 from .storage.azure_blob_storage_config import AzureBlobStorageConfig
 from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
+from .storage.clickzetta_volume_storage_config import ClickZettaVolumeStorageConfig
 from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
 from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
 from .storage.oci_storage_config import OCIStorageConfig
@@ -20,6 +21,7 @@ from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
 from .vdb.analyticdb_config import AnalyticdbConfig
 from .vdb.baidu_vector_config import BaiduVectorDBConfig
 from .vdb.chroma_config import ChromaConfig
+from .vdb.clickzetta_config import ClickzettaConfig
 from .vdb.couchbase_config import CouchbaseConfig
 from .vdb.elasticsearch_config import ElasticsearchConfig
 from .vdb.huawei_cloud_config import HuaweiCloudConfig
@@ -52,6 +54,7 @@ class StorageConfig(BaseSettings):
        "aliyun-oss",
        "azure-blob",
        "baidu-obs",
+        "clickzetta-volume",
        "google-storage",
        "huawei-obs",
        "oci-storage",
@@ -61,8 +64,9 @@ class StorageConfig(BaseSettings):
        "local",
    ] = Field(
        description="Type of storage to use."
-        " Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
-        "'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
+        " Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', "
+        "'clickzetta-volume', 'google-storage', 'huawei-obs', 'oci-storage', 'tencent-cos', "
+        "'volcengine-tos', 'supabase'. Default is 'opendal'.",
        default="opendal",
    )

@@ -140,7 +144,8 @@ class DatabaseConfig(BaseSettings):
        default="postgresql",
    )

-    @computed_field
+    @computed_field  # type: ignore[misc]
+    @property
    def SQLALCHEMY_DATABASE_URI(self) -> str:
        db_extras = (
            f"{self.DB_EXTRAS}&client_encoding={self.DB_CHARSET}" if self.DB_CHARSET else self.DB_EXTRAS
@@ -215,7 +220,7 @@ class DatabaseConfig(BaseSettings):

 class CeleryConfig(DatabaseConfig):
    CELERY_BACKEND: str = Field(
-        description="Backend for Celery task results. Options: 'database', 'redis'.",
+        description="Backend for Celery task results. Options: 'database', 'redis', 'rabbitmq'.",
        default="redis",
    )

@@ -245,7 +250,12 @@ class CeleryConfig(DatabaseConfig):

    @computed_field
    def CELERY_RESULT_BACKEND(self) -> str | None:
-        return f"db+{self.SQLALCHEMY_DATABASE_URI}" if self.CELERY_BACKEND == "database" else self.CELERY_BROKER_URL
+        if self.CELERY_BACKEND in ("database", "rabbitmq"):
+            return f"db+{self.SQLALCHEMY_DATABASE_URI}"
+        elif self.CELERY_BACKEND == "redis":
+            return self.CELERY_BROKER_URL
+        else:
+            return None

    @property
    def BROKER_USE_SSL(self) -> bool:
@@ -298,6 +308,7 @@ class MiddlewareConfig(
    AliyunOSSStorageConfig,
    AzureBlobStorageConfig,
    BaiduOBSStorageConfig,
+    ClickZettaVolumeStorageConfig,
    GoogleCloudStorageConfig,
    HuaweiCloudOBSStorageConfig,
    OCIStorageConfig,
@@ -310,6 +321,7 @@ class MiddlewareConfig(
    VectorStoreConfig,
    AnalyticdbConfig,
    ChromaConfig,
+    ClickzettaConfig,
    HuaweiCloudConfig,
    MilvusConfig,
    MyScaleConfig,
--- a/api/configs/middleware/storage/clickzetta_volume_storage_config.py
+++ b/api/configs/middleware/storage/clickzetta_volume_storage_config.py
@@ -0,0 +1,65 @@
+"""ClickZetta Volume Storage Configuration"""
+
+from typing import Optional
+
+from pydantic import Field
+from pydantic_settings import BaseSettings
+
+
+class ClickZettaVolumeStorageConfig(BaseSettings):
+    """Configuration for ClickZetta Volume storage."""
+
+    CLICKZETTA_VOLUME_USERNAME: Optional[str] = Field(
+        description="Username for ClickZetta Volume authentication",
+        default=None,
+    )
+
+    CLICKZETTA_VOLUME_PASSWORD: Optional[str] = Field(
+        description="Password for ClickZetta Volume authentication",
+        default=None,
+    )
+
+    CLICKZETTA_VOLUME_INSTANCE: Optional[str] = Field(
+        description="ClickZetta instance identifier",
+        default=None,
+    )
+
+    CLICKZETTA_VOLUME_SERVICE: str = Field(
+        description="ClickZetta service endpoint",
+        default="api.clickzetta.com",
+    )
+
+    CLICKZETTA_VOLUME_WORKSPACE: str = Field(
+        description="ClickZetta workspace name",
+        default="quick_start",
+    )
+
+    CLICKZETTA_VOLUME_VCLUSTER: str = Field(
+        description="ClickZetta virtual cluster name",
+        default="default_ap",
+    )
+
+    CLICKZETTA_VOLUME_SCHEMA: str = Field(
+        description="ClickZetta schema name",
+        default="dify",
+    )
+
+    CLICKZETTA_VOLUME_TYPE: str = Field(
+        description="ClickZetta volume type (table|user|external)",
+        default="user",
+    )
+
+    CLICKZETTA_VOLUME_NAME: Optional[str] = Field(
+        description="ClickZetta volume name for external volumes",
+        default=None,
+    )
+
+    CLICKZETTA_VOLUME_TABLE_PREFIX: str = Field(
+        description="Prefix for ClickZetta volume table names",
+        default="dataset_",
+    )
+
+    CLICKZETTA_VOLUME_DIFY_PREFIX: str = Field(
+        description="Directory prefix for User Volume to organize Dify files",
+        default="dify_km",
+    )
--- a/api/configs/middleware/vdb/clickzetta_config.py
+++ b/api/configs/middleware/vdb/clickzetta_config.py
@@ -0,0 +1,69 @@
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+
+class ClickzettaConfig(BaseModel):
+    """
+    Clickzetta Lakehouse vector database configuration
+    """
+
+    CLICKZETTA_USERNAME: Optional[str] = Field(
+        description="Username for authenticating with Clickzetta Lakehouse",
+        default=None,
+    )
+
+    CLICKZETTA_PASSWORD: Optional[str] = Field(
+        description="Password for authenticating with Clickzetta Lakehouse",
+        default=None,
+    )
+
+    CLICKZETTA_INSTANCE: Optional[str] = Field(
+        description="Clickzetta Lakehouse instance ID",
+        default=None,
+    )
+
+    CLICKZETTA_SERVICE: Optional[str] = Field(
+        description="Clickzetta API service endpoint (e.g., 'api.clickzetta.com')",
+        default="api.clickzetta.com",
+    )
+
+    CLICKZETTA_WORKSPACE: Optional[str] = Field(
+        description="Clickzetta workspace name",
+        default="default",
+    )
+
+    CLICKZETTA_VCLUSTER: Optional[str] = Field(
+        description="Clickzetta virtual cluster name",
+        default="default_ap",
+    )
+
+    CLICKZETTA_SCHEMA: Optional[str] = Field(
+        description="Database schema name in Clickzetta",
+        default="public",
+    )
+
+    CLICKZETTA_BATCH_SIZE: Optional[int] = Field(
+        description="Batch size for bulk insert operations",
+        default=100,
+    )
+
+    CLICKZETTA_ENABLE_INVERTED_INDEX: Optional[bool] = Field(
+        description="Enable inverted index for full-text search capabilities",
+        default=True,
+    )
+
+    CLICKZETTA_ANALYZER_TYPE: Optional[str] = Field(
+        description="Analyzer type for full-text search: keyword, english, chinese, unicode",
+        default="chinese",
+    )
+
+    CLICKZETTA_ANALYZER_MODE: Optional[str] = Field(
+        description="Analyzer mode for tokenization: max_word (fine-grained) or smart (intelligent)",
+        default="smart",
+    )
+
+    CLICKZETTA_VECTOR_DISTANCE_FUNCTION: Optional[str] = Field(
+        description="Distance function for vector similarity: l2_distance or cosine_distance",
+        default="cosine_distance",
+    )
--- a/api/configs/middleware/vdb/elasticsearch_config.py
+++ b/api/configs/middleware/vdb/elasticsearch_config.py
@@ -1,12 +1,13 @@
 from typing import Optional

-from pydantic import Field, PositiveInt
+from pydantic import Field, PositiveInt, model_validator
 from pydantic_settings import BaseSettings


 class ElasticsearchConfig(BaseSettings):
    """
-    Configuration settings for Elasticsearch
+    Configuration settings for both self-managed and Elastic Cloud deployments.
+    Can load from environment variables or .env files.
    """

    ELASTICSEARCH_HOST: Optional[str] = Field(
@@ -28,3 +29,50 @@ class ElasticsearchConfig(BaseSettings):
        description="Password for authenticating with Elasticsearch (default is 'elastic')",
        default="elastic",
    )
+
+    # Elastic Cloud (optional)
+    ELASTICSEARCH_USE_CLOUD: Optional[bool] = Field(
+        description="Set to True to use Elastic Cloud instead of self-hosted Elasticsearch", default=False
+    )
+    ELASTICSEARCH_CLOUD_URL: Optional[str] = Field(
+        description="Full URL for Elastic Cloud deployment (e.g., 'https://example.es.region.aws.found.io:443')",
+        default=None,
+    )
+    ELASTICSEARCH_API_KEY: Optional[str] = Field(
+        description="API key for authenticating with Elastic Cloud", default=None
+    )
+
+    # Common options
+    ELASTICSEARCH_CA_CERTS: Optional[str] = Field(
+        description="Path to CA certificate file for SSL verification", default=None
+    )
+    ELASTICSEARCH_VERIFY_CERTS: bool = Field(
+        description="Whether to verify SSL certificates (default is False)", default=False
+    )
+    ELASTICSEARCH_REQUEST_TIMEOUT: int = Field(
+        description="Request timeout in milliseconds (default is 100000)", default=100000
+    )
+    ELASTICSEARCH_RETRY_ON_TIMEOUT: bool = Field(
+        description="Whether to retry requests on timeout (default is True)", default=True
+    )
+    ELASTICSEARCH_MAX_RETRIES: int = Field(
+        description="Maximum number of retry attempts (default is 10000)", default=10000
+    )
+
+    @model_validator(mode="after")
+    def validate_elasticsearch_config(self):
+        """Validate Elasticsearch configuration based on deployment type."""
+        if self.ELASTICSEARCH_USE_CLOUD:
+            if not self.ELASTICSEARCH_CLOUD_URL:
+                raise ValueError("ELASTICSEARCH_CLOUD_URL is required when using Elastic Cloud")
+            if not self.ELASTICSEARCH_API_KEY:
+                raise ValueError("ELASTICSEARCH_API_KEY is required when using Elastic Cloud")
+        else:
+            if not self.ELASTICSEARCH_HOST:
+                raise ValueError("ELASTICSEARCH_HOST is required for self-hosted Elasticsearch")
+            if not self.ELASTICSEARCH_USERNAME:
+                raise ValueError("ELASTICSEARCH_USERNAME is required for self-hosted Elasticsearch")
+            if not self.ELASTICSEARCH_PASSWORD:
+                raise ValueError("ELASTICSEARCH_PASSWORD is required for self-hosted Elasticsearch")
+
+        return self
--- a/api/configs/middleware/vdb/tablestore_config.py
+++ b/api/configs/middleware/vdb/tablestore_config.py
@@ -28,3 +28,8 @@ class TableStoreConfig(BaseSettings):
        description="AccessKey secret for the instance name",
        default=None,
    )
+
+    TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE: bool = Field(
+        description="Whether to normalize full-text search scores to [0, 1]",
+        default=False,
+    )
--- a/api/constants/init.py
+++ b/api/constants/init.py
@@ -9,10 +9,10 @@ DEFAULT_FILE_NUMBER_LIMITS = 3
 IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "webp", "gif", "svg"]
 IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])

-VIDEO_EXTENSIONS = ["mp4", "mov", "mpeg", "mpga"]
+VIDEO_EXTENSIONS = ["mp4", "mov", "mpeg", "webm"]
 VIDEO_EXTENSIONS.extend([ext.upper() for ext in VIDEO_EXTENSIONS])

-AUDIO_EXTENSIONS = ["mp3", "m4a", "wav", "webm", "amr"]
+AUDIO_EXTENSIONS = ["mp3", "m4a", "wav", "amr", "mpga"]
 AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])


--- a/api/controllers/console/init.py
+++ b/api/controllers/console/init.py
@@ -84,6 +84,7 @@ from .datasets import (
    external,
    hit_testing,
    metadata,
+    upload_file,
    website,
 )

--- a/api/controllers/console/app/annotation.py
+++ b/api/controllers/console/app/annotation.py
@@ -100,7 +100,7 @@ class AnnotationReplyActionStatusApi(Resource):
        return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200


-class AnnotationListApi(Resource):
+class AnnotationApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
@@ -123,6 +123,23 @@ class AnnotationListApi(Resource):
        }
        return response, 200

+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_resource_check("annotation")
+    @marshal_with(annotation_fields)
+    def post(self, app_id):
+        if not current_user.is_editor:
+            raise Forbidden()
+
+        app_id = str(app_id)
+        parser = reqparse.RequestParser()
+        parser.add_argument("question", required=True, type=str, location="json")
+        parser.add_argument("answer", required=True, type=str, location="json")
+        args = parser.parse_args()
+        annotation = AppAnnotationService.insert_app_annotation_directly(args, app_id)
+        return annotation
+
    @setup_required
    @login_required
    @account_initialization_required
@@ -131,8 +148,25 @@ class AnnotationListApi(Resource):
            raise Forbidden()

        app_id = str(app_id)
-        AppAnnotationService.clear_all_annotations(app_id)
-        return {"result": "success"}, 204
+
+        # Use request.args.getlist to get annotation_ids array directly
+        annotation_ids = request.args.getlist("annotation_id")
+
+        # If annotation_ids are provided, handle batch deletion
+        if annotation_ids:
+            # Check if any annotation_ids contain empty strings or invalid values
+            if not all(annotation_id.strip() for annotation_id in annotation_ids if annotation_id):
+                return {
+                    "code": "bad_request",
+                    "message": "annotation_ids are required if the parameter is provided.",
+                }, 400
+
+            result = AppAnnotationService.delete_app_annotations_in_batch(app_id, annotation_ids)
+            return result, 204
+        # If no annotation_ids are provided, handle clearing all annotations
+        else:
+            AppAnnotationService.clear_all_annotations(app_id)
+            return {"result": "success"}, 204


 class AnnotationExportApi(Resource):
@@ -149,25 +183,6 @@ class AnnotationExportApi(Resource):
        return response, 200


-class AnnotationCreateApi(Resource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @cloud_edition_billing_resource_check("annotation")
-    @marshal_with(annotation_fields)
-    def post(self, app_id):
-        if not current_user.is_editor:
-            raise Forbidden()
-
-        app_id = str(app_id)
-        parser = reqparse.RequestParser()
-        parser.add_argument("question", required=True, type=str, location="json")
-        parser.add_argument("answer", required=True, type=str, location="json")
-        args = parser.parse_args()
-        annotation = AppAnnotationService.insert_app_annotation_directly(args, app_id)
-        return annotation
-
-
 class AnnotationUpdateDeleteApi(Resource):
    @setup_required
    @login_required
@@ -210,14 +225,15 @@ class AnnotationBatchImportApi(Resource):
            raise Forbidden()

        app_id = str(app_id)
-        # get file from request
-        file = request.files["file"]
        # check file
        if "file" not in request.files:
            raise NoFileUploadedError()

        if len(request.files) > 1:
            raise TooManyFilesError()
+
+        # get file from request
+        file = request.files["file"]
        # check file type
        if not file.filename or not file.filename.lower().endswith(".csv"):
            raise ValueError("Invalid file type. Only CSV files are allowed")
@@ -276,7 +292,7 @@ api.add_resource(AnnotationReplyActionApi, "/apps/<uuid:app_id>/annotation-reply
 api.add_resource(
    AnnotationReplyActionStatusApi, "/apps/<uuid:app_id>/annotation-reply/<string:action>/status/<uuid:job_id>"
 )
-api.add_resource(AnnotationListApi, "/apps/<uuid:app_id>/annotations")
+api.add_resource(AnnotationApi, "/apps/<uuid:app_id>/annotations")
 api.add_resource(AnnotationExportApi, "/apps/<uuid:app_id>/annotations/export")
 api.add_resource(AnnotationUpdateDeleteApi, "/apps/<uuid:app_id>/annotations/<uuid:annotation_id>")
 api.add_resource(AnnotationBatchImportApi, "/apps/<uuid:app_id>/annotations/batch-import")
--- a/api/controllers/console/app/app.py
+++ b/api/controllers/console/app/app.py
@@ -28,6 +28,12 @@ from services.feature_service import FeatureService
 ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "completion"]


+def _validate_description_length(description):
+    if description and len(description) > 400:
+        raise ValueError("Description cannot exceed 400 characters.")
+    return description
+
+
 class AppListApi(Resource):
    @setup_required
    @login_required
@@ -94,7 +100,7 @@ class AppListApi(Resource):
        """Create app"""
        parser = reqparse.RequestParser()
        parser.add_argument("name", type=str, required=True, location="json")
-        parser.add_argument("description", type=str, location="json")
+        parser.add_argument("description", type=_validate_description_length, location="json")
        parser.add_argument("mode", type=str, choices=ALLOW_CREATE_APP_MODES, location="json")
        parser.add_argument("icon_type", type=str, location="json")
        parser.add_argument("icon", type=str, location="json")
@@ -146,7 +152,7 @@ class AppApi(Resource):

        parser = reqparse.RequestParser()
        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
-        parser.add_argument("description", type=str, location="json")
+        parser.add_argument("description", type=_validate_description_length, location="json")
        parser.add_argument("icon_type", type=str, location="json")
        parser.add_argument("icon", type=str, location="json")
        parser.add_argument("icon_background", type=str, location="json")
@@ -189,7 +195,7 @@ class AppCopyApi(Resource):

        parser = reqparse.RequestParser()
        parser.add_argument("name", type=str, location="json")
-        parser.add_argument("description", type=str, location="json")
+        parser.add_argument("description", type=_validate_description_length, location="json")
        parser.add_argument("icon_type", type=str, location="json")
        parser.add_argument("icon", type=str, location="json")
        parser.add_argument("icon_background", type=str, location="json")
--- a/api/controllers/console/app/message.py
+++ b/api/controllers/console/app/message.py
@@ -5,7 +5,6 @@ from flask_restful import Resource, fields, marshal_with, reqparse
 from flask_restful.inputs import int_range
 from werkzeug.exceptions import Forbidden, InternalServerError, NotFound

-import services
 from controllers.console import api
 from controllers.console.app.error import (
    CompletionRequestError,
@@ -133,7 +132,7 @@ class MessageFeedbackApi(Resource):
                rating=args.get("rating"),
                content=None,
            )
-        except services.errors.message.MessageNotExistsError:
+        except MessageNotExistsError:
            raise NotFound("Message Not Exists.")

        return {"result": "success"}
--- a/api/controllers/console/app/statistic.py
+++ b/api/controllers/console/app/statistic.py
@@ -67,7 +67,7 @@ WHERE
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append({"date": str(i.date), "message_count": i.message_count})

@@ -176,7 +176,7 @@ WHERE
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append({"date": str(i.date), "terminal_count": i.terminal_count})

@@ -234,7 +234,7 @@ WHERE
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append(
                    {"date": str(i.date), "token_count": i.token_count, "total_price": i.total_price, "currency": "USD"}
@@ -310,7 +310,7 @@ ORDER BY
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append(
                    {"date": str(i.date), "interactions": float(i.interactions.quantize(Decimal("0.01")))}
@@ -373,7 +373,7 @@ WHERE
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append(
                    {
@@ -435,7 +435,7 @@ WHERE
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append({"date": str(i.date), "latency": round(i.latency * 1000, 4)})

@@ -495,7 +495,7 @@ WHERE
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append({"date": str(i.date), "tps": round(i.tokens_per_second, 4)})

--- a/api/controllers/console/app/workflow_statistic.py
+++ b/api/controllers/console/app/workflow_statistic.py
@@ -2,6 +2,7 @@ from datetime import datetime
 from decimal import Decimal

 import pytz
+import sqlalchemy as sa
 from flask import jsonify
 from flask_login import current_user
 from flask_restful import Resource, reqparse
@@ -71,7 +72,7 @@ WHERE
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append({"date": str(i.date), "runs": i.runs})

@@ -133,7 +134,7 @@ WHERE
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append({"date": str(i.date), "terminal_count": i.terminal_count})

@@ -195,7 +196,7 @@ WHERE
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append(
                    {
@@ -277,7 +278,7 @@ GROUP BY
        response_data = []

        with db.engine.begin() as conn:
-            rs = conn.execute(db.text(sql_query), arg_dict)
+            rs = conn.execute(sa.text(sql_query), arg_dict)
            for i in rs:
                response_data.append(
                    {"date": str(i.date), "interactions": float(i.interactions.quantize(Decimal("0.01")))}
--- a/api/controllers/console/auth/error.py
+++ b/api/controllers/console/auth/error.py
@@ -113,9 +113,3 @@ class MemberNotInTenantError(BaseHTTPException):
    error_code = "member_not_in_tenant"
    description = "The member is not in the workspace."
    code = 400
-
-
-class AccountInFreezeError(BaseHTTPException):
-    error_code = "account_in_freeze"
-    description = "This email is temporarily unavailable."
-    code = 400
--- a/api/controllers/console/datasets/datasets.py
+++ b/api/controllers/console/datasets/datasets.py
@@ -41,7 +41,7 @@ def _validate_name(name):


 def _validate_description_length(description):
-    if len(description) > 400:
+    if description and len(description) > 400:
        raise ValueError("Description cannot exceed 400 characters.")
    return description

@@ -113,7 +113,7 @@ class DatasetListApi(Resource):
        )
        parser.add_argument(
            "description",
-            type=str,
+            type=_validate_description_length,
            nullable=True,
            required=False,
            default="",
@@ -683,6 +683,7 @@ class DatasetRetrievalSettingApi(Resource):
                | VectorType.HUAWEI_CLOUD
                | VectorType.TENCENT
                | VectorType.MATRIXONE
+                | VectorType.CLICKZETTA
            ):
                return {
                    "retrieval_method": [
@@ -731,6 +732,7 @@ class DatasetRetrievalSettingMockApi(Resource):
                | VectorType.TENCENT
                | VectorType.HUAWEI_CLOUD
                | VectorType.MATRIXONE
+                | VectorType.CLICKZETTA
            ):
                return {
                    "retrieval_method": [
--- a/api/controllers/console/datasets/datasets_document.py
+++ b/api/controllers/console/datasets/datasets_document.py
@@ -642,7 +642,7 @@ class DocumentIndexingStatusApi(DocumentResource):
        return marshal(document_dict, document_status_fields)


-class DocumentDetailApi(DocumentResource):
+class DocumentApi(DocumentResource):
    METADATA_CHOICES = {"all", "only", "without"}

    @setup_required
@@ -730,6 +730,28 @@ class DocumentDetailApi(DocumentResource):

        return response, 200

+    @setup_required
+    @login_required
+    @account_initialization_required
+    @cloud_edition_billing_rate_limit_check("knowledge")
+    def delete(self, dataset_id, document_id):
+        dataset_id = str(dataset_id)
+        document_id = str(document_id)
+        dataset = DatasetService.get_dataset(dataset_id)
+        if dataset is None:
+            raise NotFound("Dataset not found.")
+        # check user's model setting
+        DatasetService.check_dataset_model_setting(dataset)
+
+        document = self.get_document(dataset_id, document_id)
+
+        try:
+            DocumentService.delete_document(document)
+        except services.errors.document.DocumentIndexingError:
+            raise DocumentIndexingError("Cannot delete document during indexing.")
+
+        return {"result": "success"}, 204
+

 class DocumentProcessingApi(DocumentResource):
    @setup_required
@@ -768,30 +790,6 @@ class DocumentProcessingApi(DocumentResource):
        return {"result": "success"}, 200


-class DocumentDeleteApi(DocumentResource):
-    @setup_required
-    @login_required
-    @account_initialization_required
-    @cloud_edition_billing_rate_limit_check("knowledge")
-    def delete(self, dataset_id, document_id):
-        dataset_id = str(dataset_id)
-        document_id = str(document_id)
-        dataset = DatasetService.get_dataset(dataset_id)
-        if dataset is None:
-            raise NotFound("Dataset not found.")
-        # check user's model setting
-        DatasetService.check_dataset_model_setting(dataset)
-
-        document = self.get_document(dataset_id, document_id)
-
-        try:
-            DocumentService.delete_document(document)
-        except services.errors.document.DocumentIndexingError:
-            raise DocumentIndexingError("Cannot delete document during indexing.")
-
-        return {"result": "success"}, 204
-
-
 class DocumentMetadataApi(DocumentResource):
    @setup_required
    @login_required
@@ -1037,11 +1035,10 @@ api.add_resource(
 api.add_resource(DocumentBatchIndexingEstimateApi, "/datasets/<uuid:dataset_id>/batch/<string:batch>/indexing-estimate")
 api.add_resource(DocumentBatchIndexingStatusApi, "/datasets/<uuid:dataset_id>/batch/<string:batch>/indexing-status")
 api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-status")
-api.add_resource(DocumentDetailApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
+api.add_resource(DocumentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
 api.add_resource(
    DocumentProcessingApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/<string:action>"
 )
-api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
 api.add_resource(DocumentMetadataApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/metadata")
 api.add_resource(DocumentStatusApi, "/datasets/<uuid:dataset_id>/documents/status/<string:action>/batch")
 api.add_resource(DocumentPauseApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause")
--- a/api/controllers/console/datasets/metadata.py
+++ b/api/controllers/console/datasets/metadata.py
@@ -22,8 +22,8 @@ class DatasetMetadataCreateApi(Resource):
    @marshal_with(dataset_metadata_fields)
    def post(self, dataset_id):
        parser = reqparse.RequestParser()
-        parser.add_argument("type", type=str, required=True, nullable=True, location="json")
-        parser.add_argument("name", type=str, required=True, nullable=True, location="json")
+        parser.add_argument("type", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
        args = parser.parse_args()
        metadata_args = MetadataArgs(**args)

@@ -56,7 +56,7 @@ class DatasetMetadataApi(Resource):
    @marshal_with(dataset_metadata_fields)
    def patch(self, dataset_id, metadata_id):
        parser = reqparse.RequestParser()
-        parser.add_argument("name", type=str, required=True, nullable=True, location="json")
+        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
        args = parser.parse_args()

        dataset_id_str = str(dataset_id)
@@ -127,7 +127,7 @@ class DocumentMetadataEditApi(Resource):
        DatasetService.check_dataset_permission(dataset, current_user)

        parser = reqparse.RequestParser()
-        parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json")
+        parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json")
        args = parser.parse_args()
        metadata_args = MetadataOperationData(**args)

--- a/api/controllers/console/datasets/upload_file.py
+++ b/api/controllers/console/datasets/upload_file.py
@@ -0,0 +1,62 @@
+from flask_login import current_user
+from flask_restful import Resource
+from werkzeug.exceptions import NotFound
+
+from controllers.console import api
+from controllers.console.wraps import (
+    account_initialization_required,
+    setup_required,
+)
+from core.file import helpers as file_helpers
+from extensions.ext_database import db
+from models.dataset import Dataset
+from models.model import UploadFile
+from services.dataset_service import DocumentService
+
+
+class UploadFileApi(Resource):
+    @setup_required
+    @account_initialization_required
+    def get(self, dataset_id, document_id):
+        """Get upload file."""
+        # check dataset
+        dataset_id = str(dataset_id)
+        dataset = (
+            db.session.query(Dataset)
+            .filter(Dataset.tenant_id == current_user.current_tenant_id, Dataset.id == dataset_id)
+            .first()
+        )
+        if not dataset:
+            raise NotFound("Dataset not found.")
+        # check document
+        document_id = str(document_id)
+        document = DocumentService.get_document(dataset.id, document_id)
+        if not document:
+            raise NotFound("Document not found.")
+        # check upload file
+        if document.data_source_type != "upload_file":
+            raise ValueError(f"Document data source type ({document.data_source_type}) is not upload_file.")
+        data_source_info = document.data_source_info_dict
+        if data_source_info and "upload_file_id" in data_source_info:
+            file_id = data_source_info["upload_file_id"]
+            upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
+            if not upload_file:
+                raise NotFound("UploadFile not found.")
+        else:
+            raise ValueError("Upload file id not found in document data source info.")
+
+        url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
+        return {
+            "id": upload_file.id,
+            "name": upload_file.name,
+            "size": upload_file.size,
+            "extension": upload_file.extension,
+            "url": url,
+            "download_url": f"{url}&as_attachment=true",
+            "mime_type": upload_file.mime_type,
+            "created_by": upload_file.created_by,
+            "created_at": upload_file.created_at.timestamp(),
+        }, 200
+
+
+api.add_resource(UploadFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/upload-file")
--- a/api/controllers/console/error.py
+++ b/api/controllers/console/error.py
@@ -127,7 +127,7 @@ class EducationActivateLimitError(BaseHTTPException):
    code = 429


-class CompilanceRateLimitError(BaseHTTPException):
-    error_code = "compilance_rate_limit"
+class ComplianceRateLimitError(BaseHTTPException):
+    error_code = "compliance_rate_limit"
    description = "Rate limit exceeded for downloading compliance report."
    code = 429
--- a/api/controllers/console/explore/installed_app.py
+++ b/api/controllers/console/explore/installed_app.py
@@ -58,21 +58,38 @@ class InstalledAppsListApi(Resource):
        # filter out apps that user doesn't have access to
        if FeatureService.get_system_features().webapp_auth.enabled:
            user_id = current_user.id
-            res = []
            app_ids = [installed_app["app"].id for installed_app in installed_app_list]
            webapp_settings = EnterpriseService.WebAppAuth.batch_get_app_access_mode_by_id(app_ids)
+
+            # Pre-filter out apps without setting or with sso_verified
+            filtered_installed_apps = []
+            app_id_to_app_code = {}
+
            for installed_app in installed_app_list:
-                webapp_setting = webapp_settings.get(installed_app["app"].id)
-                if not webapp_setting:
+                app_id = installed_app["app"].id
+                webapp_setting = webapp_settings.get(app_id)
+                if not webapp_setting or webapp_setting.access_mode == "sso_verified":
                    continue
-                if webapp_setting.access_mode == "sso_verified":
-                    continue
-                app_code = AppService.get_app_code_by_id(str(installed_app["app"].id))
-                if EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
-                    user_id=user_id,
-                    app_code=app_code,
-                ):
+                app_code = AppService.get_app_code_by_id(str(app_id))
+                app_id_to_app_code[app_id] = app_code
+                filtered_installed_apps.append(installed_app)
+
+            app_codes = list(app_id_to_app_code.values())
+
+            # Batch permission check
+            permissions = EnterpriseService.WebAppAuth.batch_is_user_allowed_to_access_webapps(
+                user_id=user_id,
+                app_codes=app_codes,
+            )
+
+            # Keep only allowed apps
+            res = []
+            for installed_app in filtered_installed_apps:
+                app_id = installed_app["app"].id
+                app_code = app_id_to_app_code[app_id]
+                if permissions.get(app_code):
                    res.append(installed_app)
+
            installed_app_list = res
            logger.debug("installed_app_list: %s, user_id: %s", installed_app_list, user_id)

--- a/api/controllers/console/explore/message.py
+++ b/api/controllers/console/explore/message.py
@@ -5,7 +5,6 @@ from flask_restful import marshal_with, reqparse
 from flask_restful.inputs import int_range
 from werkzeug.exceptions import InternalServerError, NotFound

-import services
 from controllers.console.app.error import (
    AppMoreLikeThisDisabledError,
    CompletionRequestError,
@@ -29,7 +28,11 @@ from models.model import AppMode
 from services.app_generate_service import AppGenerateService
 from services.errors.app import MoreLikeThisDisabledError
 from services.errors.conversation import ConversationNotExistsError
-from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
+from services.errors.message import (
+    FirstMessageNotExistsError,
+    MessageNotExistsError,
+    SuggestedQuestionsAfterAnswerDisabledError,
+)
 from services.message_service import MessageService


@@ -52,9 +55,9 @@ class MessageListApi(InstalledAppResource):
            return MessageService.pagination_by_first_id(
                app_model, current_user, args["conversation_id"], args["first_id"], args["limit"]
            )
-        except services.errors.conversation.ConversationNotExistsError:
+        except ConversationNotExistsError:
            raise NotFound("Conversation Not Exists.")
-        except services.errors.message.FirstMessageNotExistsError:
+        except FirstMessageNotExistsError:
            raise NotFound("First Message Not Exists.")


@@ -77,7 +80,7 @@ class MessageFeedbackApi(InstalledAppResource):
                rating=args.get("rating"),
                content=args.get("content"),
            )
-        except services.errors.message.MessageNotExistsError:
+        except MessageNotExistsError:
            raise NotFound("Message Not Exists.")

        return {"result": "success"}
--- a/api/controllers/console/files.py
+++ b/api/controllers/console/files.py
@@ -49,7 +49,6 @@ class FileApi(Resource):
    @marshal_with(file_fields)
    @cloud_edition_billing_resource_check("documents")
    def post(self):
-        file = request.files["file"]
        source_str = request.form.get("source")
        source: Literal["datasets"] | None = "datasets" if source_str == "datasets" else None

@@ -58,6 +57,7 @@ class FileApi(Resource):

        if len(request.files) > 1:
            raise TooManyFilesError()
+        file = request.files["file"]

        if not file.filename:
            raise FilenameNotExistsError
--- a/api/controllers/console/workspace/account.py
+++ b/api/controllers/console/workspace/account.py
@@ -9,14 +9,13 @@ from configs import dify_config
 from constants.languages import supported_language
 from controllers.console import api
 from controllers.console.auth.error import (
-    AccountInFreezeError,
    EmailAlreadyInUseError,
    EmailChangeLimitError,
    EmailCodeError,
    InvalidEmailError,
    InvalidTokenError,
 )
-from controllers.console.error import AccountNotFound, EmailSendIpLimitError
+from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError
 from controllers.console.workspace.error import (
    AccountAlreadyInitedError,
    CurrentPasswordIncorrectError,
--- a/api/controllers/console/workspace/workspace.py
+++ b/api/controllers/console/workspace/workspace.py
@@ -191,9 +191,6 @@ class WebappLogoWorkspaceApi(Resource):
    @account_initialization_required
    @cloud_edition_billing_resource_check("workspace_custom")
    def post(self):
-        # get file from request
-        file = request.files["file"]
-
        # check file
        if "file" not in request.files:
            raise NoFileUploadedError()
@@ -201,6 +198,8 @@ class WebappLogoWorkspaceApi(Resource):
        if len(request.files) > 1:
            raise TooManyFilesError()

+        # get file from request
+        file = request.files["file"]
        if not file.filename:
            raise FilenameNotExistsError

--- a/api/controllers/service_api/init.py
+++ b/api/controllers/service_api/init.py
@@ -6,6 +6,6 @@ bp = Blueprint("service_api", __name__, url_prefix="/v1")
 api = ExternalApi(bp)

 from . import index
-from .app import annotation, app, audio, completion, conversation, file, message, site, workflow
+from .app import annotation, app, audio, completion, conversation, file, file_preview, message, site, workflow
 from .dataset import dataset, document, hit_testing, metadata, segment, upload_file
 from .workspace import models
--- a/api/controllers/service_api/app/completion.py
+++ b/api/controllers/service_api/app/completion.py
@@ -2,7 +2,7 @@ import logging

 from flask import request
 from flask_restful import Resource, reqparse
-from werkzeug.exceptions import InternalServerError, NotFound
+from werkzeug.exceptions import BadRequest, InternalServerError, NotFound

 import services
 from controllers.service_api import api
@@ -30,6 +30,7 @@ from libs import helper
 from libs.helper import uuid_value
 from models.model import App, AppMode, EndUser
 from services.app_generate_service import AppGenerateService
+from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError
 from services.errors.llm import InvokeRateLimitError


@@ -47,6 +48,9 @@ class CompletionApi(Resource):
        parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")

        args = parser.parse_args()
+        external_trace_id = get_external_trace_id(request)
+        if external_trace_id:
+            args["external_trace_id"] = external_trace_id

        streaming = args["response_mode"] == "streaming"

@@ -110,7 +114,7 @@ class ChatApi(Resource):
        parser.add_argument("conversation_id", type=uuid_value, location="json")
        parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
        parser.add_argument("auto_generate_name", type=bool, required=False, default=True, location="json")
-
+        parser.add_argument("workflow_id", type=str, required=False, location="json")
        args = parser.parse_args()

        external_trace_id = get_external_trace_id(request)
@@ -125,6 +129,12 @@ class ChatApi(Resource):
            )

            return helper.compact_generate_response(response)
+        except WorkflowNotFoundError as ex:
+            raise NotFound(str(ex))
+        except IsDraftWorkflowError as ex:
+            raise BadRequest(str(ex))
+        except WorkflowIdFormatError as ex:
+            raise BadRequest(str(ex))
        except services.errors.conversation.ConversationNotExistsError:
            raise NotFound("Conversation Not Exists.")
        except services.errors.conversation.ConversationCompletedError:
--- a/api/controllers/service_api/app/conversation.py
+++ b/api/controllers/service_api/app/conversation.py
@@ -1,7 +1,9 @@
+import json
+
 from flask_restful import Resource, marshal_with, reqparse
 from flask_restful.inputs import int_range
 from sqlalchemy.orm import Session
-from werkzeug.exceptions import NotFound
+from werkzeug.exceptions import BadRequest, NotFound

 import services
 from controllers.service_api import api
@@ -15,6 +17,7 @@ from fields.conversation_fields import (
    simple_conversation_fields,
 )
 from fields.conversation_variable_fields import (
+    conversation_variable_fields,
    conversation_variable_infinite_scroll_pagination_fields,
 )
 from libs.helper import uuid_value
@@ -120,7 +123,41 @@ class ConversationVariablesApi(Resource):
            raise NotFound("Conversation Not Exists.")


+class ConversationVariableDetailApi(Resource):
+    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
+    @marshal_with(conversation_variable_fields)
+    def put(self, app_model: App, end_user: EndUser, c_id, variable_id):
+        """Update a conversation variable's value"""
+        app_mode = AppMode.value_of(app_model.mode)
+        if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
+            raise NotChatAppError()
+
+        conversation_id = str(c_id)
+        variable_id = str(variable_id)
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("value", required=True, location="json")
+        args = parser.parse_args()
+
+        try:
+            return ConversationService.update_conversation_variable(
+                app_model, conversation_id, variable_id, end_user, json.loads(args["value"])
+            )
+        except services.errors.conversation.ConversationNotExistsError:
+            raise NotFound("Conversation Not Exists.")
+        except services.errors.conversation.ConversationVariableNotExistsError:
+            raise NotFound("Conversation Variable Not Exists.")
+        except services.errors.conversation.ConversationVariableTypeMismatchError as e:
+            raise BadRequest(str(e))
+
+
 api.add_resource(ConversationRenameApi, "/conversations/<uuid:c_id>/name", endpoint="conversation_name")
 api.add_resource(ConversationApi, "/conversations")
 api.add_resource(ConversationDetailApi, "/conversations/<uuid:c_id>", endpoint="conversation_detail")
 api.add_resource(ConversationVariablesApi, "/conversations/<uuid:c_id>/variables", endpoint="conversation_variables")
+api.add_resource(
+    ConversationVariableDetailApi,
+    "/conversations/<uuid:c_id>/variables/<uuid:variable_id>",
+    endpoint="conversation_variable_detail",
+    methods=["PUT"],
+)
--- a/api/controllers/service_api/app/error.py
+++ b/api/controllers/service_api/app/error.py
@@ -107,3 +107,15 @@ class UnsupportedFileTypeError(BaseHTTPException):
    error_code = "unsupported_file_type"
    description = "File type not allowed."
    code = 415
+
+
+class FileNotFoundError(BaseHTTPException):
+    error_code = "file_not_found"
+    description = "The requested file was not found."
+    code = 404
+
+
+class FileAccessDeniedError(BaseHTTPException):
+    error_code = "file_access_denied"
+    description = "Access to the requested file is denied."
+    code = 403
--- a/api/controllers/service_api/app/file.py
+++ b/api/controllers/service_api/app/file.py
@@ -20,18 +20,17 @@ class FileApi(Resource):
    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM))
    @marshal_with(file_fields)
    def post(self, app_model: App, end_user: EndUser):
-        file = request.files["file"]
-
        # check file
        if "file" not in request.files:
            raise NoFileUploadedError()

-        if not file.mimetype:
-            raise UnsupportedFileTypeError()
-
        if len(request.files) > 1:
            raise TooManyFilesError()

+        file = request.files["file"]
+        if not file.mimetype:
+            raise UnsupportedFileTypeError()
+
        if not file.filename:
            raise FilenameNotExistsError

--- a/api/controllers/service_api/app/file_preview.py
+++ b/api/controllers/service_api/app/file_preview.py
@@ -0,0 +1,186 @@
+import logging
+from urllib.parse import quote
+
+from flask import Response
+from flask_restful import Resource, reqparse
+
+from controllers.service_api import api
+from controllers.service_api.app.error import (
+    FileAccessDeniedError,
+    FileNotFoundError,
+)
+from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
+from extensions.ext_database import db
+from extensions.ext_storage import storage
+from models.model import App, EndUser, Message, MessageFile, UploadFile
+
+logger = logging.getLogger(__name__)
+
+
+class FilePreviewApi(Resource):
+    """
+    Service API File Preview endpoint
+
+    Provides secure file preview/download functionality for external API users.
+    Files can only be accessed if they belong to messages within the requesting app's context.
+    """
+
+    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY))
+    def get(self, app_model: App, end_user: EndUser, file_id: str):
+        """
+        Preview/Download a file that was uploaded via Service API
+
+        Args:
+            app_model: The authenticated app model
+            end_user: The authenticated end user (optional)
+            file_id: UUID of the file to preview
+
+        Query Parameters:
+            user: Optional user identifier
+            as_attachment: Boolean, whether to download as attachment (default: false)
+
+        Returns:
+            Stream response with file content
+
+        Raises:
+            FileNotFoundError: File does not exist
+            FileAccessDeniedError: File access denied (not owned by app)
+        """
+        file_id = str(file_id)
+
+        # Parse query parameters
+        parser = reqparse.RequestParser()
+        parser.add_argument("as_attachment", type=bool, required=False, default=False, location="args")
+        args = parser.parse_args()
+
+        # Validate file ownership and get file objects
+        message_file, upload_file = self._validate_file_ownership(file_id, app_model.id)
+
+        # Get file content generator
+        try:
+            generator = storage.load(upload_file.key, stream=True)
+        except Exception as e:
+            raise FileNotFoundError(f"Failed to load file content: {str(e)}")
+
+        # Build response with appropriate headers
+        response = self._build_file_response(generator, upload_file, args["as_attachment"])
+
+        return response
+
+    def _validate_file_ownership(self, file_id: str, app_id: str) -> tuple[MessageFile, UploadFile]:
+        """
+        Validate that the file belongs to a message within the requesting app's context
+
+        Security validations performed:
+        1. File exists in MessageFile table (was used in a conversation)
+        2. Message belongs to the requesting app
+        3. UploadFile record exists and is accessible
+        4. File tenant matches app tenant (additional security layer)
+
+        Args:
+            file_id: UUID of the file to validate
+            app_id: UUID of the requesting app
+
+        Returns:
+            Tuple of (MessageFile, UploadFile) if validation passes
+
+        Raises:
+            FileNotFoundError: File or related records not found
+            FileAccessDeniedError: File does not belong to the app's context
+        """
+        try:
+            # Input validation
+            if not file_id or not app_id:
+                raise FileAccessDeniedError("Invalid file or app identifier")
+
+            # First, find the MessageFile that references this upload file
+            message_file = db.session.query(MessageFile).where(MessageFile.upload_file_id == file_id).first()
+
+            if not message_file:
+                raise FileNotFoundError("File not found in message context")
+
+            # Get the message and verify it belongs to the requesting app
+            message = (
+                db.session.query(Message).where(Message.id == message_file.message_id, Message.app_id == app_id).first()
+            )
+
+            if not message:
+                raise FileAccessDeniedError("File access denied: not owned by requesting app")
+
+            # Get the actual upload file record
+            upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
+
+            if not upload_file:
+                raise FileNotFoundError("Upload file record not found")
+
+            # Additional security: verify tenant isolation
+            app = db.session.query(App).where(App.id == app_id).first()
+            if app and upload_file.tenant_id != app.tenant_id:
+                raise FileAccessDeniedError("File access denied: tenant mismatch")
+
+            return message_file, upload_file
+
+        except (FileNotFoundError, FileAccessDeniedError):
+            # Re-raise our custom exceptions
+            raise
+        except Exception as e:
+            # Log unexpected errors for debugging
+            logger.exception(
+                "Unexpected error during file ownership validation",
+                extra={"file_id": file_id, "app_id": app_id, "error": str(e)},
+            )
+            raise FileAccessDeniedError("File access validation failed")
+
+    def _build_file_response(self, generator, upload_file: UploadFile, as_attachment: bool = False) -> Response:
+        """
+        Build Flask Response object with appropriate headers for file streaming
+
+        Args:
+            generator: File content generator from storage
+            upload_file: UploadFile database record
+            as_attachment: Whether to set Content-Disposition as attachment
+
+        Returns:
+            Flask Response object with streaming file content
+        """
+        response = Response(
+            generator,
+            mimetype=upload_file.mime_type,
+            direct_passthrough=True,
+            headers={},
+        )
+
+        # Add Content-Length if known
+        if upload_file.size and upload_file.size > 0:
+            response.headers["Content-Length"] = str(upload_file.size)
+
+        # Add Accept-Ranges header for audio/video files to support seeking
+        if upload_file.mime_type in [
+            "audio/mpeg",
+            "audio/wav",
+            "audio/mp4",
+            "audio/ogg",
+            "audio/flac",
+            "audio/aac",
+            "video/mp4",
+            "video/webm",
+            "video/quicktime",
+            "audio/x-m4a",
+        ]:
+            response.headers["Accept-Ranges"] = "bytes"
+
+        # Set Content-Disposition for downloads
+        if as_attachment and upload_file.name:
+            encoded_filename = quote(upload_file.name)
+            response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}"
+            # Override content-type for downloads to force download
+            response.headers["Content-Type"] = "application/octet-stream"
+
+        # Add caching headers for performance
+        response.headers["Cache-Control"] = "public, max-age=3600"  # Cache for 1 hour
+
+        return response
+
+
+# Register the API endpoint
+api.add_resource(FilePreviewApi, "/files/<uuid:file_id>/preview")
--- a/api/controllers/service_api/app/message.py
+++ b/api/controllers/service_api/app/message.py
@@ -15,7 +15,11 @@ from fields.message_fields import agent_thought_fields, feedback_fields
 from fields.raws import FilesContainedField
 from libs.helper import TimestampField, uuid_value
 from models.model import App, AppMode, EndUser
-from services.errors.message import SuggestedQuestionsAfterAnswerDisabledError
+from services.errors.message import (
+    FirstMessageNotExistsError,
+    MessageNotExistsError,
+    SuggestedQuestionsAfterAnswerDisabledError,
+)
 from services.message_service import MessageService


@@ -65,7 +69,7 @@ class MessageListApi(Resource):
            )
        except services.errors.conversation.ConversationNotExistsError:
            raise NotFound("Conversation Not Exists.")
-        except services.errors.message.FirstMessageNotExistsError:
+        except FirstMessageNotExistsError:
            raise NotFound("First Message Not Exists.")


@@ -87,7 +91,7 @@ class MessageFeedbackApi(Resource):
                rating=args.get("rating"),
                content=args.get("content"),
            )
-        except services.errors.message.MessageNotExistsError:
+        except MessageNotExistsError:
            raise NotFound("Message Not Exists.")

        return {"result": "success"}
@@ -117,7 +121,7 @@ class MessageSuggestedApi(Resource):
            questions = MessageService.get_suggested_questions_after_answer(
                app_model=app_model, user=end_user, message_id=message_id, invoke_from=InvokeFrom.SERVICE_API
            )
-        except services.errors.message.MessageNotExistsError:
+        except MessageNotExistsError:
            raise NotFound("Message Not Exists.")
        except SuggestedQuestionsAfterAnswerDisabledError:
            raise BadRequest("Suggested Questions Is Disabled.")
--- a/api/controllers/service_api/app/workflow.py
+++ b/api/controllers/service_api/app/workflow.py
@@ -5,7 +5,7 @@ from flask import request
 from flask_restful import Resource, fields, marshal_with, reqparse
 from flask_restful.inputs import int_range
 from sqlalchemy.orm import Session, sessionmaker
-from werkzeug.exceptions import InternalServerError
+from werkzeug.exceptions import BadRequest, InternalServerError, NotFound

 from controllers.service_api import api
 from controllers.service_api.app.error import (
@@ -34,6 +34,7 @@ from libs.helper import TimestampField
 from models.model import App, AppMode, EndUser
 from repositories.factory import DifyAPIRepositoryFactory
 from services.app_generate_service import AppGenerateService
+from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError
 from services.errors.llm import InvokeRateLimitError
 from services.workflow_app_service import WorkflowAppService

@@ -120,6 +121,59 @@ class WorkflowRunApi(Resource):
            raise InternalServerError()


+class WorkflowRunByIdApi(Resource):
+    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True))
+    def post(self, app_model: App, end_user: EndUser, workflow_id: str):
+        """
+        Run specific workflow by ID
+        """
+        app_mode = AppMode.value_of(app_model.mode)
+        if app_mode != AppMode.WORKFLOW:
+            raise NotWorkflowAppError()
+
+        parser = reqparse.RequestParser()
+        parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
+        parser.add_argument("files", type=list, required=False, location="json")
+        parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
+        args = parser.parse_args()
+
+        # Add workflow_id to args for AppGenerateService
+        args["workflow_id"] = workflow_id
+
+        external_trace_id = get_external_trace_id(request)
+        if external_trace_id:
+            args["external_trace_id"] = external_trace_id
+        streaming = args.get("response_mode") == "streaming"
+
+        try:
+            response = AppGenerateService.generate(
+                app_model=app_model, user=end_user, args=args, invoke_from=InvokeFrom.SERVICE_API, streaming=streaming
+            )
+
+            return helper.compact_generate_response(response)
+        except WorkflowNotFoundError as ex:
+            raise NotFound(str(ex))
+        except IsDraftWorkflowError as ex:
+            raise BadRequest(str(ex))
+        except WorkflowIdFormatError as ex:
+            raise BadRequest(str(ex))
+        except ProviderTokenNotInitError as ex:
+            raise ProviderNotInitializeError(ex.description)
+        except QuotaExceededError:
+            raise ProviderQuotaExceededError()
+        except ModelCurrentlyNotSupportError:
+            raise ProviderModelCurrentlyNotSupportError()
+        except InvokeRateLimitError as ex:
+            raise InvokeRateLimitHttpError(ex.description)
+        except InvokeError as e:
+            raise CompletionRequestError(e.description)
+        except ValueError as e:
+            raise e
+        except Exception:
+            logging.exception("internal server error.")
+            raise InternalServerError()
+
+
 class WorkflowTaskStopApi(Resource):
    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True))
    def post(self, app_model: App, end_user: EndUser, task_id: str):
@@ -193,5 +247,6 @@ class WorkflowAppLogApi(Resource):

 api.add_resource(WorkflowRunApi, "/workflows/run")
 api.add_resource(WorkflowRunDetailApi, "/workflows/run/<string:workflow_run_id>")
+api.add_resource(WorkflowRunByIdApi, "/workflows/<string:workflow_id>/run")
 api.add_resource(WorkflowTaskStopApi, "/workflows/tasks/<string:task_id>/stop")
 api.add_resource(WorkflowAppLogApi, "/workflows/logs")
--- a/api/controllers/service_api/dataset/dataset.py
+++ b/api/controllers/service_api/dataset/dataset.py
@@ -29,7 +29,7 @@ def _validate_name(name):


 def _validate_description_length(description):
-    if len(description) > 400:
+    if description and len(description) > 400:
        raise ValueError("Description cannot exceed 400 characters.")
    return description

@@ -87,7 +87,7 @@ class DatasetListApi(DatasetApiResource):
        )
        parser.add_argument(
            "description",
-            type=str,
+            type=_validate_description_length,
            nullable=True,
            required=False,
            default="",
--- a/api/controllers/service_api/dataset/document.py
+++ b/api/controllers/service_api/dataset/document.py
@@ -234,8 +234,6 @@ class DocumentAddByFileApi(DatasetApiResource):
                args["retrieval_model"].get("reranking_model").get("reranking_model_name"),
            )

-        # save file info
-        file = request.files["file"]
        # check file
        if "file" not in request.files:
            raise NoFileUploadedError()
@@ -243,6 +241,8 @@ class DocumentAddByFileApi(DatasetApiResource):
        if len(request.files) > 1:
            raise TooManyFilesError()

+        # save file info
+        file = request.files["file"]
        if not file.filename:
            raise FilenameNotExistsError

@@ -358,39 +358,6 @@ class DocumentUpdateByFileApi(DatasetApiResource):
        return documents_and_batch_fields, 200


-class DocumentDeleteApi(DatasetApiResource):
-    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
-    def delete(self, tenant_id, dataset_id, document_id):
-        """Delete document."""
-        document_id = str(document_id)
-        dataset_id = str(dataset_id)
-        tenant_id = str(tenant_id)
-
-        # get dataset info
-        dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
-
-        if not dataset:
-            raise ValueError("Dataset does not exist.")
-
-        document = DocumentService.get_document(dataset.id, document_id)
-
-        # 404 if document not found
-        if document is None:
-            raise NotFound("Document Not Exists.")
-
-        # 403 if document is archived
-        if DocumentService.check_archived(document):
-            raise ArchivedDocumentImmutableError()
-
-        try:
-            # delete document
-            DocumentService.delete_document(document)
-        except services.errors.document.DocumentIndexingError:
-            raise DocumentIndexingError("Cannot delete document during indexing.")
-
-        return 204
-
-
 class DocumentListApi(DatasetApiResource):
    def get(self, tenant_id, dataset_id):
        dataset_id = str(dataset_id)
@@ -473,7 +440,7 @@ class DocumentIndexingStatusApi(DatasetApiResource):
        return data


-class DocumentDetailApi(DatasetApiResource):
+class DocumentApi(DatasetApiResource):
    METADATA_CHOICES = {"all", "only", "without"}

    def get(self, tenant_id, dataset_id, document_id):
@@ -567,6 +534,37 @@ class DocumentDetailApi(DatasetApiResource):

        return response

+    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
+    def delete(self, tenant_id, dataset_id, document_id):
+        """Delete document."""
+        document_id = str(document_id)
+        dataset_id = str(dataset_id)
+        tenant_id = str(tenant_id)
+
+        # get dataset info
+        dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
+
+        if not dataset:
+            raise ValueError("Dataset does not exist.")
+
+        document = DocumentService.get_document(dataset.id, document_id)
+
+        # 404 if document not found
+        if document is None:
+            raise NotFound("Document Not Exists.")
+
+        # 403 if document is archived
+        if DocumentService.check_archived(document):
+            raise ArchivedDocumentImmutableError()
+
+        try:
+            # delete document
+            DocumentService.delete_document(document)
+        except services.errors.document.DocumentIndexingError:
+            raise DocumentIndexingError("Cannot delete document during indexing.")
+
+        return 204
+

 api.add_resource(
    DocumentAddByTextApi,
@@ -588,7 +586,6 @@ api.add_resource(
    "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file",
    "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-file",
 )
-api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
+api.add_resource(DocumentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
 api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
 api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")
-api.add_resource(DocumentDetailApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
--- a/api/controllers/service_api/dataset/metadata.py
+++ b/api/controllers/service_api/dataset/metadata.py
@@ -17,8 +17,8 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource):
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
    def post(self, tenant_id, dataset_id):
        parser = reqparse.RequestParser()
-        parser.add_argument("type", type=str, required=True, nullable=True, location="json")
-        parser.add_argument("name", type=str, required=True, nullable=True, location="json")
+        parser.add_argument("type", type=str, required=True, nullable=False, location="json")
+        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
        args = parser.parse_args()
        metadata_args = MetadataArgs(**args)

@@ -43,7 +43,7 @@ class DatasetMetadataServiceApi(DatasetApiResource):
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
    def patch(self, tenant_id, dataset_id, metadata_id):
        parser = reqparse.RequestParser()
-        parser.add_argument("name", type=str, required=True, nullable=True, location="json")
+        parser.add_argument("name", type=str, required=True, nullable=False, location="json")
        args = parser.parse_args()

        dataset_id_str = str(dataset_id)
@@ -101,7 +101,7 @@ class DocumentMetadataEditServiceApi(DatasetApiResource):
        DatasetService.check_dataset_permission(dataset, current_user)

        parser = reqparse.RequestParser()
-        parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json")
+        parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json")
        args = parser.parse_args()
        metadata_args = MetadataOperationData(**args)

--- a/api/controllers/web/app.py
+++ b/api/controllers/web/app.py
@@ -1,5 +1,6 @@
 from flask import request
 from flask_restful import Resource, marshal_with, reqparse
+from werkzeug.exceptions import Unauthorized

 from controllers.common import fields
 from controllers.web import api
@@ -75,14 +76,14 @@ class AppWebAuthPermission(Resource):
        try:
            auth_header = request.headers.get("Authorization")
            if auth_header is None:
-                raise
+                raise Unauthorized("Authorization header is missing.")
            if " " not in auth_header:
-                raise
+                raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")

            auth_scheme, tk = auth_header.split(None, 1)
            auth_scheme = auth_scheme.lower()
            if auth_scheme != "bearer":
-                raise
+                raise Unauthorized("Authorization scheme must be 'Bearer'")

            decoded = PassportService().verify(tk)
            user_id = decoded.get("user_id", "visitor")
--- a/api/controllers/web/files.py
+++ b/api/controllers/web/files.py
@@ -12,18 +12,17 @@ from services.file_service import FileService
 class FileApi(WebApiResource):
    @marshal_with(file_fields)
    def post(self, app_model, end_user):
-        file = request.files["file"]
-        source = request.form.get("source")
-
        if "file" not in request.files:
            raise NoFileUploadedError()

        if len(request.files) > 1:
            raise TooManyFilesError()

+        file = request.files["file"]
        if not file.filename:
            raise FilenameNotExistsError

+        source = request.form.get("source")
        if source not in ("datasets", None):
            source = None

--- a/api/controllers/web/message.py
+++ b/api/controllers/web/message.py
@@ -4,7 +4,6 @@ from flask_restful import fields, marshal_with, reqparse
 from flask_restful.inputs import int_range
 from werkzeug.exceptions import InternalServerError, NotFound

-import services
 from controllers.web import api
 from controllers.web.error import (
    AppMoreLikeThisDisabledError,
@@ -29,7 +28,11 @@ from models.model import AppMode
 from services.app_generate_service import AppGenerateService
 from services.errors.app import MoreLikeThisDisabledError
 from services.errors.conversation import ConversationNotExistsError
-from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
+from services.errors.message import (
+    FirstMessageNotExistsError,
+    MessageNotExistsError,
+    SuggestedQuestionsAfterAnswerDisabledError,
+)
 from services.message_service import MessageService


@@ -73,9 +76,9 @@ class MessageListApi(WebApiResource):
            return MessageService.pagination_by_first_id(
                app_model, end_user, args["conversation_id"], args["first_id"], args["limit"]
            )
-        except services.errors.conversation.ConversationNotExistsError:
+        except ConversationNotExistsError:
            raise NotFound("Conversation Not Exists.")
-        except services.errors.message.FirstMessageNotExistsError:
+        except FirstMessageNotExistsError:
            raise NotFound("First Message Not Exists.")


@@ -96,7 +99,7 @@ class MessageFeedbackApi(WebApiResource):
                rating=args.get("rating"),
                content=args.get("content"),
            )
-        except services.errors.message.MessageNotExistsError:
+        except MessageNotExistsError:
            raise NotFound("Message Not Exists.")

        return {"result": "success"}
--- a/api/core/app/app_config/entities.py
+++ b/api/core/app/app_config/entities.py
@@ -148,6 +148,8 @@ SupportedComparisonOperator = Literal[
    "is not",
    "empty",
    "not empty",
+    "in",
+    "not in",
    # for number
    "=",
    "≠",
--- a/api/core/app/apps/advanced_chat/app_runner.py
+++ b/api/core/app/apps/advanced_chat/app_runner.py
@@ -118,26 +118,8 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
            ):
                return

-            # Init conversation variables
-            stmt = select(ConversationVariable).where(
-                ConversationVariable.app_id == self.conversation.app_id,
-                ConversationVariable.conversation_id == self.conversation.id,
-            )
-            with Session(db.engine) as session:
-                db_conversation_variables = session.scalars(stmt).all()
-                if not db_conversation_variables:
-                    # Create conversation variables if they don't exist.
-                    db_conversation_variables = [
-                        ConversationVariable.from_variable(
-                            app_id=self.conversation.app_id, conversation_id=self.conversation.id, variable=variable
-                        )
-                        for variable in self._workflow.conversation_variables
-                    ]
-                    session.add_all(db_conversation_variables)
-                # Convert database entities to variables.
-                conversation_variables = [item.to_variable() for item in db_conversation_variables]
-
-                session.commit()
+            # Initialize conversation variables
+            conversation_variables = self._initialize_conversation_variables()

            # Create a variable pool.
            system_inputs = SystemVariable(
@@ -292,3 +274,100 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
            message_id=message_id,
            trace_manager=app_generate_entity.trace_manager,
        )
+
+    def _initialize_conversation_variables(self) -> list[VariableUnion]:
+        """
+        Initialize conversation variables for the current conversation.
+
+        This method:
+        1. Loads existing variables from the database
+        2. Creates new variables if none exist
+        3. Syncs missing variables from the workflow definition
+
+        :return: List of conversation variables ready for use
+        """
+        with Session(db.engine) as session:
+            existing_variables = self._load_existing_conversation_variables(session)
+
+            if not existing_variables:
+                # First time initialization - create all variables
+                existing_variables = self._create_all_conversation_variables(session)
+            else:
+                # Check and add any missing variables from the workflow
+                existing_variables = self._sync_missing_conversation_variables(session, existing_variables)
+
+            # Convert to Variable objects for use in the workflow
+            conversation_variables = [var.to_variable() for var in existing_variables]
+
+            session.commit()
+            return cast(list[VariableUnion], conversation_variables)
+
+    def _load_existing_conversation_variables(self, session: Session) -> list[ConversationVariable]:
+        """
+        Load existing conversation variables from the database.
+
+        :param session: Database session
+        :return: List of existing conversation variables
+        """
+        stmt = select(ConversationVariable).where(
+            ConversationVariable.app_id == self.conversation.app_id,
+            ConversationVariable.conversation_id == self.conversation.id,
+        )
+        return list(session.scalars(stmt).all())
+
+    def _create_all_conversation_variables(self, session: Session) -> list[ConversationVariable]:
+        """
+        Create all conversation variables for a new conversation.
+
+        :param session: Database session
+        :return: List of created conversation variables
+        """
+        new_variables = [
+            ConversationVariable.from_variable(
+                app_id=self.conversation.app_id, conversation_id=self.conversation.id, variable=variable
+            )
+            for variable in self._workflow.conversation_variables
+        ]
+
+        if new_variables:
+            session.add_all(new_variables)
+
+        return new_variables
+
+    def _sync_missing_conversation_variables(
+        self, session: Session, existing_variables: list[ConversationVariable]
+    ) -> list[ConversationVariable]:
+        """
+        Sync missing conversation variables from the workflow definition.
+
+        This handles the case where new variables are added to a workflow
+        after conversations have already been created.
+
+        :param session: Database session
+        :param existing_variables: List of existing conversation variables
+        :return: Updated list including any newly created variables
+        """
+        # Get IDs of existing and workflow variables
+        existing_ids = {var.id for var in existing_variables}
+        workflow_variables = {var.id: var for var in self._workflow.conversation_variables}
+
+        # Find missing variable IDs
+        missing_ids = set(workflow_variables.keys()) - existing_ids
+
+        if not missing_ids:
+            return existing_variables
+
+        # Create missing variables with their default values
+        new_variables = [
+            ConversationVariable.from_variable(
+                app_id=self.conversation.app_id,
+                conversation_id=self.conversation.id,
+                variable=workflow_variables[var_id],
+            )
+            for var_id in missing_ids
+        ]
+
+        session.add_all(new_variables)
+
+        # Return combined list
+        return existing_variables + new_variables
--- a/api/core/app/entities/app_invoke_entities.py
+++ b/api/core/app/entities/app_invoke_entities.py
@@ -9,7 +9,6 @@ from core.app.app_config.entities import EasyUIBasedAppConfig, WorkflowUIBasedAp
 from core.entities.provider_configuration import ProviderModelBundle
 from core.file import File, FileUploadConfig
 from core.model_runtime.entities.model_entities import AIModelEntity
-from core.ops.ops_trace_manager import TraceQueueManager


 class InvokeFrom(Enum):
@@ -114,7 +113,8 @@ class AppGenerateEntity(BaseModel):
    extras: dict[str, Any] = Field(default_factory=dict)

    # tracing instance
-    trace_manager: Optional[TraceQueueManager] = None
+    # Using Any to avoid circular import with TraceQueueManager
+    trace_manager: Optional[Any] = None


 class EasyUIBasedAppGenerateEntity(AppGenerateEntity):
--- a/api/core/app/task_pipeline/message_cycle_manager.py
+++ b/api/core/app/task_pipeline/message_cycle_manager.py
@@ -23,6 +23,7 @@ from core.app.entities.task_entities import (
    MessageFileStreamResponse,
    MessageReplaceStreamResponse,
    MessageStreamResponse,
+    StreamEvent,
    WorkflowTaskState,
 )
 from core.llm_generator.llm_generator import LLMGenerator
@@ -180,11 +181,15 @@ class MessageCycleManager:
        :param message_id: message id
        :return:
        """
+        message_file = db.session.query(MessageFile).filter(MessageFile.id == message_id).first()
+        event_type = StreamEvent.MESSAGE_FILE if message_file else StreamEvent.MESSAGE
+
        return MessageStreamResponse(
            task_id=self._application_generate_entity.task_id,
            id=message_id,
            answer=answer,
            from_variable_selector=from_variable_selector,
+            event=event_type,
        )

    def message_replace_to_stream_response(self, answer: str, reason: str = "") -> MessageReplaceStreamResponse:
--- a/api/core/entities/provider_configuration.py
+++ b/api/core/entities/provider_configuration.py
@@ -843,7 +843,7 @@ class ProviderConfiguration(BaseModel):
                    continue

                status = ModelStatus.ACTIVE
-                if m.model in model_setting_map:
+                if m.model_type in model_setting_map and m.model in model_setting_map[m.model_type]:
                    model_setting = model_setting_map[m.model_type][m.model]
                    if model_setting.enabled is False:
                        status = ModelStatus.DISABLED
--- a/api/core/entities/provider_entities.py
+++ b/api/core/entities/provider_entities.py
@@ -176,7 +176,7 @@ class ProviderConfig(BasicProviderConfig):

    scope: AppSelectorScope | ModelSelectorScope | ToolSelectorScope | None = None
    required: bool = False
-    default: Optional[Union[int, str]] = None
+    default: Optional[Union[int, str, float, bool]] = None
    options: Optional[list[Option]] = None
    label: Optional[I18nObject] = None
    help: Optional[I18nObject] = None
--- a/api/core/file/file_manager.py
+++ b/api/core/file/file_manager.py
@@ -32,7 +32,7 @@ def get_attr(*, file: File, attr: FileAttribute):
        case FileAttribute.TRANSFER_METHOD:
            return file.transfer_method.value
        case FileAttribute.URL:
-            return file.remote_url
+            return _to_url(file)
        case FileAttribute.EXTENSION:
            return file.extension
        case FileAttribute.RELATED_ID:
--- a/api/core/memory/token_buffer_memory.py
+++ b/api/core/memory/token_buffer_memory.py
@@ -121,9 +121,8 @@ class TokenBufferMemory:
        curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)

        if curr_message_tokens > max_token_limit:
-            pruned_memory = []
            while curr_message_tokens > max_token_limit and len(prompt_messages) > 1:
-                pruned_memory.append(prompt_messages.pop(0))
+                prompt_messages.pop(0)
                curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)

        return prompt_messages
--- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py
+++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenizer.py
--- a/api/core/ops/aliyun_trace/aliyun_trace.py
+++ b/api/core/ops/aliyun_trace/aliyun_trace.py
@@ -10,6 +10,7 @@ from sqlalchemy.orm import Session, sessionmaker
 from core.ops.aliyun_trace.data_exporter.traceclient import (
    TraceClient,
    convert_datetime_to_nanoseconds,
+    convert_string_to_id,
    convert_to_span_id,
    convert_to_trace_id,
    generate_span_id,
@@ -101,8 +102,9 @@ class AliyunDataTrace(BaseTraceInstance):
            raise ValueError(f"Aliyun get run url failed: {str(e)}")

    def workflow_trace(self, trace_info: WorkflowTraceInfo):
-        external_trace_id = trace_info.metadata.get("external_trace_id")
-        trace_id = external_trace_id or convert_to_trace_id(trace_info.workflow_run_id)
+        trace_id = convert_to_trace_id(trace_info.workflow_run_id)
+        if trace_info.trace_id:
+            trace_id = convert_string_to_id(trace_info.trace_id)
        workflow_span_id = convert_to_span_id(trace_info.workflow_run_id, "workflow")
        self.add_workflow_span(trace_id, workflow_span_id, trace_info)

@@ -130,6 +132,9 @@ class AliyunDataTrace(BaseTraceInstance):
            status = Status(StatusCode.ERROR, trace_info.error)

        trace_id = convert_to_trace_id(message_id)
+        if trace_info.trace_id:
+            trace_id = convert_string_to_id(trace_info.trace_id)
+
        message_span_id = convert_to_span_id(message_id, "message")
        message_span = SpanData(
            trace_id=trace_id,
@@ -186,9 +191,13 @@ class AliyunDataTrace(BaseTraceInstance):
            return
        message_id = trace_info.message_id

+        trace_id = convert_to_trace_id(message_id)
+        if trace_info.trace_id:
+            trace_id = convert_string_to_id(trace_info.trace_id)
+
        documents_data = extract_retrieval_documents(trace_info.documents)
        dataset_retrieval_span = SpanData(
-            trace_id=convert_to_trace_id(message_id),
+            trace_id=trace_id,
            parent_span_id=convert_to_span_id(message_id, "message"),
            span_id=generate_span_id(),
            name="dataset_retrieval",
@@ -214,8 +223,12 @@ class AliyunDataTrace(BaseTraceInstance):
        if trace_info.error:
            status = Status(StatusCode.ERROR, trace_info.error)

+        trace_id = convert_to_trace_id(message_id)
+        if trace_info.trace_id:
+            trace_id = convert_string_to_id(trace_info.trace_id)
+
        tool_span = SpanData(
-            trace_id=convert_to_trace_id(message_id),
+            trace_id=trace_id,
            parent_span_id=convert_to_span_id(message_id, "message"),
            span_id=generate_span_id(),
            name=trace_info.tool_name,
@@ -451,8 +464,13 @@ class AliyunDataTrace(BaseTraceInstance):
        status: Status = Status(StatusCode.OK)
        if trace_info.error:
            status = Status(StatusCode.ERROR, trace_info.error)
+
+        trace_id = convert_to_trace_id(message_id)
+        if trace_info.trace_id:
+            trace_id = convert_string_to_id(trace_info.trace_id)
+
        suggested_question_span = SpanData(
-            trace_id=convert_to_trace_id(message_id),
+            trace_id=trace_id,
            parent_span_id=convert_to_span_id(message_id, "message"),
            span_id=convert_to_span_id(message_id, "suggested_question"),
            name="suggested_question",
--- a/api/core/ops/aliyun_trace/data_exporter/traceclient.py
+++ b/api/core/ops/aliyun_trace/data_exporter/traceclient.py
@@ -181,15 +181,21 @@ def convert_to_trace_id(uuid_v4: Optional[str]) -> int:
        raise ValueError(f"Invalid UUID input: {e}")


+def convert_string_to_id(string: Optional[str]) -> int:
+    if not string:
+        return generate_span_id()
+    hash_bytes = hashlib.sha256(string.encode("utf-8")).digest()
+    id = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False)
+    return id
+
+
 def convert_to_span_id(uuid_v4: Optional[str], span_type: str) -> int:
    try:
        uuid_obj = uuid.UUID(uuid_v4)
    except Exception as e:
        raise ValueError(f"Invalid UUID input: {e}")
    combined_key = f"{uuid_obj.hex}-{span_type}"
-    hash_bytes = hashlib.sha256(combined_key.encode("utf-8")).digest()
-    span_id = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False)
-    return span_id
+    return convert_string_to_id(combined_key)


 def convert_datetime_to_nanoseconds(start_time_a: Optional[datetime]) -> Optional[int]:
--- a/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py
+++ b/api/core/ops/arize_phoenix_trace/arize_phoenix_trace.py
@@ -4,6 +4,7 @@ import logging
 import os
 from datetime import datetime, timedelta
 from typing import Any, Optional, Union, cast
+from urllib.parse import urlparse

 from openinference.semconv.trace import OpenInferenceSpanKindValues, SpanAttributes
 from opentelemetry import trace
@@ -40,8 +41,14 @@ def setup_tracer(arize_phoenix_config: ArizeConfig | PhoenixConfig) -> tuple[tra
    try:
        # Choose the appropriate exporter based on config type
        exporter: Union[GrpcOTLPSpanExporter, HttpOTLPSpanExporter]
+
+        # Inspect the provided endpoint to determine its structure
+        parsed = urlparse(arize_phoenix_config.endpoint)
+        base_endpoint = f"{parsed.scheme}://{parsed.netloc}"
+        path = parsed.path.rstrip("/")
+
        if isinstance(arize_phoenix_config, ArizeConfig):
-            arize_endpoint = f"{arize_phoenix_config.endpoint}/v1"
+            arize_endpoint = f"{base_endpoint}/v1"
            arize_headers = {
                "api_key": arize_phoenix_config.api_key or "",
                "space_id": arize_phoenix_config.space_id or "",
@@ -53,7 +60,7 @@ def setup_tracer(arize_phoenix_config: ArizeConfig | PhoenixConfig) -> tuple[tra
                timeout=30,
            )
        else:
-            phoenix_endpoint = f"{arize_phoenix_config.endpoint}/v1/traces"
+            phoenix_endpoint = f"{base_endpoint}{path}/v1/traces"
            phoenix_headers = {
                "api_key": arize_phoenix_config.api_key or "",
                "authorization": f"Bearer {arize_phoenix_config.api_key or ''}",
@@ -91,16 +98,21 @@ def datetime_to_nanos(dt: Optional[datetime]) -> int:
    return int(dt.timestamp() * 1_000_000_000)


-def uuid_to_trace_id(string: Optional[str]) -> int:
-    """Convert UUID string to a valid trace ID (16-byte integer)."""
+def string_to_trace_id128(string: Optional[str]) -> int:
+    """
+    Convert any input string into a stable 128-bit integer trace ID.
+
+    This uses SHA-256 hashing and takes the first 16 bytes (128 bits) of the digest.
+    It's suitable for generating consistent, unique identifiers from strings.
+    """
    if string is None:
        string = ""
    hash_object = hashlib.sha256(string.encode())

-    # Take the first 16 bytes (128 bits) of the hash
+    # Take the first 16 bytes (128 bits) of the hash digest
    digest = hash_object.digest()[:16]

-    # Convert to integer (128 bits)
+    # Convert to a 128-bit integer
    return int.from_bytes(digest, byteorder="big")


@@ -153,8 +165,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
        }
        workflow_metadata.update(trace_info.metadata)

-        external_trace_id = trace_info.metadata.get("external_trace_id")
-        trace_id = external_trace_id or uuid_to_trace_id(trace_info.workflow_run_id)
+        trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.workflow_run_id)
        span_id = RandomIdGenerator().generate_span_id()
        context = SpanContext(
            trace_id=trace_id,
@@ -310,7 +321,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
            SpanAttributes.SESSION_ID: trace_info.message_data.conversation_id,
        }

-        trace_id = uuid_to_trace_id(trace_info.message_id)
+        trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.message_id)
        message_span_id = RandomIdGenerator().generate_span_id()
        span_context = SpanContext(
            trace_id=trace_id,
@@ -406,7 +417,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
        }
        metadata.update(trace_info.metadata)

-        trace_id = uuid_to_trace_id(trace_info.message_id)
+        trace_id = string_to_trace_id128(trace_info.message_id)
        span_id = RandomIdGenerator().generate_span_id()
        context = SpanContext(
            trace_id=trace_id,
@@ -468,7 +479,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
        }
        metadata.update(trace_info.metadata)

-        trace_id = uuid_to_trace_id(trace_info.message_id)
+        trace_id = string_to_trace_id128(trace_info.message_id)
        span_id = RandomIdGenerator().generate_span_id()
        context = SpanContext(
            trace_id=trace_id,
@@ -521,7 +532,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
        }
        metadata.update(trace_info.metadata)

-        trace_id = uuid_to_trace_id(trace_info.message_id)
+        trace_id = string_to_trace_id128(trace_info.message_id)
        span_id = RandomIdGenerator().generate_span_id()
        context = SpanContext(
            trace_id=trace_id,
@@ -568,7 +579,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
            "tool_config": json.dumps(trace_info.tool_config, ensure_ascii=False),
        }

-        trace_id = uuid_to_trace_id(trace_info.message_id)
+        trace_id = string_to_trace_id128(trace_info.message_id)
        tool_span_id = RandomIdGenerator().generate_span_id()
        logger.info("[Arize/Phoenix] Creating tool trace with trace_id: %s, span_id: %s", trace_id, tool_span_id)

@@ -629,7 +640,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
        }
        metadata.update(trace_info.metadata)

-        trace_id = uuid_to_trace_id(trace_info.message_id)
+        trace_id = string_to_trace_id128(trace_info.message_id)
        span_id = RandomIdGenerator().generate_span_id()
        context = SpanContext(
            trace_id=trace_id,
--- a/api/core/ops/entities/config_entity.py
+++ b/api/core/ops/entities/config_entity.py
@@ -87,7 +87,7 @@ class PhoenixConfig(BaseTracingConfig):
    @field_validator("endpoint")
    @classmethod
    def endpoint_validator(cls, v, info: ValidationInfo):
-        return cls.validate_endpoint_url(v, "https://app.phoenix.arize.com")
+        return validate_url_with_path(v, "https://app.phoenix.arize.com")


 class LangfuseConfig(BaseTracingConfig):
--- a/api/core/ops/entities/trace_entity.py
+++ b/api/core/ops/entities/trace_entity.py
@@ -14,6 +14,7 @@ class BaseTraceInfo(BaseModel):
    start_time: Optional[datetime] = None
    end_time: Optional[datetime] = None
    metadata: dict[str, Any]
+    trace_id: Optional[str] = None

    @field_validator("inputs", "outputs")
    @classmethod
--- a/api/core/ops/langfuse_trace/langfuse_trace.py
+++ b/api/core/ops/langfuse_trace/langfuse_trace.py
@@ -67,14 +67,13 @@ class LangFuseDataTrace(BaseTraceInstance):
            self.generate_name_trace(trace_info)

    def workflow_trace(self, trace_info: WorkflowTraceInfo):
-        external_trace_id = trace_info.metadata.get("external_trace_id")
-        trace_id = external_trace_id or trace_info.workflow_run_id
+        trace_id = trace_info.trace_id or trace_info.workflow_run_id
        user_id = trace_info.metadata.get("user_id")
        metadata = trace_info.metadata
        metadata["workflow_app_log_id"] = trace_info.workflow_app_log_id

        if trace_info.message_id:
-            trace_id = external_trace_id or trace_info.message_id
+            trace_id = trace_info.trace_id or trace_info.message_id
            name = TraceTaskName.MESSAGE_TRACE.value
            trace_data = LangfuseTrace(
                id=trace_id,
@@ -250,8 +249,10 @@ class LangFuseDataTrace(BaseTraceInstance):
                user_id = end_user_data.session_id
                metadata["user_id"] = user_id

+        trace_id = trace_info.trace_id or message_id
+
        trace_data = LangfuseTrace(
-            id=message_id,
+            id=trace_id,
            user_id=user_id,
            name=TraceTaskName.MESSAGE_TRACE.value,
            input={
@@ -285,7 +286,7 @@ class LangFuseDataTrace(BaseTraceInstance):

        langfuse_generation_data = LangfuseGeneration(
            name="llm",
-            trace_id=message_id,
+            trace_id=trace_id,
            start_time=trace_info.start_time,
            end_time=trace_info.end_time,
            model=message_data.model_id,
@@ -311,7 +312,7 @@ class LangFuseDataTrace(BaseTraceInstance):
                "preset_response": trace_info.preset_response,
                "inputs": trace_info.inputs,
            },
-            trace_id=trace_info.message_id,
+            trace_id=trace_info.trace_id or trace_info.message_id,
            start_time=trace_info.start_time or trace_info.message_data.created_at,
            end_time=trace_info.end_time or trace_info.message_data.created_at,
            metadata=trace_info.metadata,
@@ -334,7 +335,7 @@ class LangFuseDataTrace(BaseTraceInstance):
            name=TraceTaskName.SUGGESTED_QUESTION_TRACE.value,
            input=trace_info.inputs,
            output=str(trace_info.suggested_question),
-            trace_id=trace_info.message_id,
+            trace_id=trace_info.trace_id or trace_info.message_id,
            start_time=trace_info.start_time,
            end_time=trace_info.end_time,
            metadata=trace_info.metadata,
@@ -352,7 +353,7 @@ class LangFuseDataTrace(BaseTraceInstance):
            name=TraceTaskName.DATASET_RETRIEVAL_TRACE.value,
            input=trace_info.inputs,
            output={"documents": trace_info.documents},
-            trace_id=trace_info.message_id,
+            trace_id=trace_info.trace_id or trace_info.message_id,
            start_time=trace_info.start_time or trace_info.message_data.created_at,
            end_time=trace_info.end_time or trace_info.message_data.updated_at,
            metadata=trace_info.metadata,
@@ -365,7 +366,7 @@ class LangFuseDataTrace(BaseTraceInstance):
            name=trace_info.tool_name,
            input=trace_info.tool_inputs,
            output=trace_info.tool_outputs,
-            trace_id=trace_info.message_id,
+            trace_id=trace_info.trace_id or trace_info.message_id,
            start_time=trace_info.start_time,
            end_time=trace_info.end_time,
            metadata=trace_info.metadata,
--- a/api/core/ops/langsmith_trace/langsmith_trace.py
+++ b/api/core/ops/langsmith_trace/langsmith_trace.py
@@ -65,8 +65,7 @@ class LangSmithDataTrace(BaseTraceInstance):
            self.generate_name_trace(trace_info)

    def workflow_trace(self, trace_info: WorkflowTraceInfo):
-        external_trace_id = trace_info.metadata.get("external_trace_id")
-        trace_id = external_trace_id or trace_info.message_id or trace_info.workflow_run_id
+        trace_id = trace_info.trace_id or trace_info.message_id or trace_info.workflow_run_id
        if trace_info.start_time is None:
            trace_info.start_time = datetime.now()
        message_dotted_order = (
@@ -290,7 +289,7 @@ class LangSmithDataTrace(BaseTraceInstance):
            reference_example_id=None,
            input_attachments={},
            output_attachments={},
-            trace_id=None,
+            trace_id=trace_info.trace_id,
            dotted_order=None,
            parent_run_id=None,
        )
@@ -319,7 +318,7 @@ class LangSmithDataTrace(BaseTraceInstance):
            reference_example_id=None,
            input_attachments={},
            output_attachments={},
-            trace_id=None,
+            trace_id=trace_info.trace_id,
            dotted_order=None,
            id=str(uuid.uuid4()),
        )
@@ -351,7 +350,7 @@ class LangSmithDataTrace(BaseTraceInstance):
            reference_example_id=None,
            input_attachments={},
            output_attachments={},
-            trace_id=None,
+            trace_id=trace_info.trace_id,
            dotted_order=None,
            error="",
            file_list=[],
@@ -381,7 +380,7 @@ class LangSmithDataTrace(BaseTraceInstance):
            reference_example_id=None,
            input_attachments={},
            output_attachments={},
-            trace_id=None,
+            trace_id=trace_info.trace_id,
            dotted_order=None,
            error="",
            file_list=[],
@@ -410,7 +409,7 @@ class LangSmithDataTrace(BaseTraceInstance):
            reference_example_id=None,
            input_attachments={},
            output_attachments={},
-            trace_id=None,
+            trace_id=trace_info.trace_id,
            dotted_order=None,
            error="",
            file_list=[],
@@ -440,7 +439,7 @@ class LangSmithDataTrace(BaseTraceInstance):
            reference_example_id=None,
            input_attachments={},
            output_attachments={},
-            trace_id=None,
+            trace_id=trace_info.trace_id,
            dotted_order=None,
            error=trace_info.error or "",
        )
@@ -465,7 +464,7 @@ class LangSmithDataTrace(BaseTraceInstance):
            reference_example_id=None,
            input_attachments={},
            output_attachments={},
-            trace_id=None,
+            trace_id=trace_info.trace_id,
            dotted_order=None,
            error="",
            file_list=[],
--- a/api/core/ops/opik_trace/opik_trace.py
+++ b/api/core/ops/opik_trace/opik_trace.py
@@ -96,8 +96,7 @@ class OpikDataTrace(BaseTraceInstance):
            self.generate_name_trace(trace_info)

    def workflow_trace(self, trace_info: WorkflowTraceInfo):
-        external_trace_id = trace_info.metadata.get("external_trace_id")
-        dify_trace_id = external_trace_id or trace_info.workflow_run_id
+        dify_trace_id = trace_info.trace_id or trace_info.workflow_run_id
        opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)
        workflow_metadata = wrap_metadata(
            trace_info.metadata, message_id=trace_info.message_id, workflow_app_log_id=trace_info.workflow_app_log_id
@@ -105,7 +104,7 @@ class OpikDataTrace(BaseTraceInstance):
        root_span_id = None

        if trace_info.message_id:
-            dify_trace_id = external_trace_id or trace_info.message_id
+            dify_trace_id = trace_info.trace_id or trace_info.message_id
            opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)

            trace_data = {
@@ -276,7 +275,7 @@ class OpikDataTrace(BaseTraceInstance):
            return

        metadata = trace_info.metadata
-        message_id = trace_info.message_id
+        dify_trace_id = trace_info.trace_id or trace_info.message_id

        user_id = message_data.from_account_id
        metadata["user_id"] = user_id
@@ -291,7 +290,7 @@ class OpikDataTrace(BaseTraceInstance):
                metadata["end_user_id"] = end_user_id

        trace_data = {
-            "id": prepare_opik_uuid(trace_info.start_time, message_id),
+            "id": prepare_opik_uuid(trace_info.start_time, dify_trace_id),
            "name": TraceTaskName.MESSAGE_TRACE.value,
            "start_time": trace_info.start_time,
            "end_time": trace_info.end_time,
@@ -330,7 +329,7 @@ class OpikDataTrace(BaseTraceInstance):
        start_time = trace_info.start_time or trace_info.message_data.created_at

        span_data = {
-            "trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
+            "trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id),
            "name": TraceTaskName.MODERATION_TRACE.value,
            "type": "tool",
            "start_time": start_time,
@@ -356,7 +355,7 @@ class OpikDataTrace(BaseTraceInstance):
        start_time = trace_info.start_time or message_data.created_at

        span_data = {
-            "trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
+            "trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id),
            "name": TraceTaskName.SUGGESTED_QUESTION_TRACE.value,
            "type": "tool",
            "start_time": start_time,
@@ -376,7 +375,7 @@ class OpikDataTrace(BaseTraceInstance):
        start_time = trace_info.start_time or trace_info.message_data.created_at

        span_data = {
-            "trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
+            "trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id),
            "name": TraceTaskName.DATASET_RETRIEVAL_TRACE.value,
            "type": "tool",
            "start_time": start_time,
@@ -391,7 +390,7 @@ class OpikDataTrace(BaseTraceInstance):

    def tool_trace(self, trace_info: ToolTraceInfo):
        span_data = {
-            "trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
+            "trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.trace_id or trace_info.message_id),
            "name": trace_info.tool_name,
            "type": "tool",
            "start_time": trace_info.start_time,
@@ -406,7 +405,7 @@ class OpikDataTrace(BaseTraceInstance):

    def generate_name_trace(self, trace_info: GenerateNameTraceInfo):
        trace_data = {
-            "id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
+            "id": prepare_opik_uuid(trace_info.start_time, trace_info.trace_id or trace_info.message_id),
            "name": TraceTaskName.GENERATE_NAME_TRACE.value,
            "start_time": trace_info.start_time,
            "end_time": trace_info.end_time,
--- a/api/core/ops/ops_trace_manager.py
+++ b/api/core/ops/ops_trace_manager.py
@@ -322,7 +322,7 @@ class OpsTraceManager:
        :return:
        """
        # auth check
-        if enabled == True:
+        if enabled:
            try:
                provider_config_map[tracing_provider]
            except KeyError:
@@ -422,8 +422,11 @@ class TraceTask:
        self.timer = timer
        self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001")
        self.app_id = None
-
+        self.trace_id = None
        self.kwargs = kwargs
+        external_trace_id = kwargs.get("external_trace_id")
+        if external_trace_id:
+            self.trace_id = external_trace_id

    def execute(self):
        return self.preprocess()
@@ -520,11 +523,8 @@ class TraceTask:
                "app_id": workflow_run.app_id,
            }

-            external_trace_id = self.kwargs.get("external_trace_id")
-            if external_trace_id:
-                metadata["external_trace_id"] = external_trace_id
-
            workflow_trace_info = WorkflowTraceInfo(
+                trace_id=self.trace_id,
                workflow_data=workflow_run.to_dict(),
                conversation_id=conversation_id,
                workflow_id=workflow_id,
@@ -584,6 +584,7 @@ class TraceTask:
        message_tokens = message_data.message_tokens

        message_trace_info = MessageTraceInfo(
+            trace_id=self.trace_id,
            message_id=message_id,
            message_data=message_data.to_dict(),
            conversation_model=conversation_mode,
@@ -627,6 +628,7 @@ class TraceTask:
            workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None

        moderation_trace_info = ModerationTraceInfo(
+            trace_id=self.trace_id,
            message_id=workflow_app_log_id or message_id,
            inputs=inputs,
            message_data=message_data.to_dict(),
@@ -667,6 +669,7 @@ class TraceTask:
            workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None

        suggested_question_trace_info = SuggestedQuestionTraceInfo(
+            trace_id=self.trace_id,
            message_id=workflow_app_log_id or message_id,
            message_data=message_data.to_dict(),
            inputs=message_data.message,
@@ -708,6 +711,7 @@ class TraceTask:
        }

        dataset_retrieval_trace_info = DatasetRetrievalTraceInfo(
+            trace_id=self.trace_id,
            message_id=message_id,
            inputs=message_data.query or message_data.inputs,
            documents=[doc.model_dump() for doc in documents] if documents else [],
@@ -772,6 +776,7 @@ class TraceTask:
            )

        tool_trace_info = ToolTraceInfo(
+            trace_id=self.trace_id,
            message_id=message_id,
            message_data=message_data.to_dict(),
            tool_name=tool_name,
@@ -807,6 +812,7 @@ class TraceTask:
        }

        generate_name_trace_info = GenerateNameTraceInfo(
+            trace_id=self.trace_id,
            conversation_id=conversation_id,
            inputs=inputs,
            outputs=generate_conversation_name,
--- a/api/core/ops/weave_trace/weave_trace.py
+++ b/api/core/ops/weave_trace/weave_trace.py
@@ -87,8 +87,7 @@ class WeaveDataTrace(BaseTraceInstance):
            self.generate_name_trace(trace_info)

    def workflow_trace(self, trace_info: WorkflowTraceInfo):
-        external_trace_id = trace_info.metadata.get("external_trace_id")
-        trace_id = external_trace_id or trace_info.message_id or trace_info.workflow_run_id
+        trace_id = trace_info.trace_id or trace_info.message_id or trace_info.workflow_run_id
        if trace_info.start_time is None:
            trace_info.start_time = datetime.now()

@@ -245,8 +244,12 @@ class WeaveDataTrace(BaseTraceInstance):
        attributes["start_time"] = trace_info.start_time
        attributes["end_time"] = trace_info.end_time
        attributes["tags"] = ["message", str(trace_info.conversation_mode)]
+
+        trace_id = trace_info.trace_id or message_id
+        attributes["trace_id"] = trace_id
+
        message_run = WeaveTraceModel(
-            id=message_id,
+            id=trace_id,
            op=str(TraceTaskName.MESSAGE_TRACE.value),
            input_tokens=trace_info.message_tokens,
            output_tokens=trace_info.answer_tokens,
@@ -274,7 +277,7 @@ class WeaveDataTrace(BaseTraceInstance):
        )
        self.start_call(
            llm_run,
-            parent_run_id=message_id,
+            parent_run_id=trace_id,
        )
        self.finish_call(llm_run)
        self.finish_call(message_run)
@@ -289,6 +292,9 @@ class WeaveDataTrace(BaseTraceInstance):
        attributes["start_time"] = trace_info.start_time or trace_info.message_data.created_at
        attributes["end_time"] = trace_info.end_time or trace_info.message_data.updated_at

+        trace_id = trace_info.trace_id or trace_info.message_id
+        attributes["trace_id"] = trace_id
+
        moderation_run = WeaveTraceModel(
            id=str(uuid.uuid4()),
            op=str(TraceTaskName.MODERATION_TRACE.value),
@@ -303,7 +309,7 @@ class WeaveDataTrace(BaseTraceInstance):
            exception=getattr(trace_info, "error", None),
            file_list=[],
        )
-        self.start_call(moderation_run, parent_run_id=trace_info.message_id)
+        self.start_call(moderation_run, parent_run_id=trace_id)
        self.finish_call(moderation_run)

    def suggested_question_trace(self, trace_info: SuggestedQuestionTraceInfo):
@@ -316,6 +322,9 @@ class WeaveDataTrace(BaseTraceInstance):
        attributes["start_time"] = (trace_info.start_time or message_data.created_at,)
        attributes["end_time"] = (trace_info.end_time or message_data.updated_at,)

+        trace_id = trace_info.trace_id or trace_info.message_id
+        attributes["trace_id"] = trace_id
+
        suggested_question_run = WeaveTraceModel(
            id=str(uuid.uuid4()),
            op=str(TraceTaskName.SUGGESTED_QUESTION_TRACE.value),
@@ -326,7 +335,7 @@ class WeaveDataTrace(BaseTraceInstance):
            file_list=[],
        )

-        self.start_call(suggested_question_run, parent_run_id=trace_info.message_id)
+        self.start_call(suggested_question_run, parent_run_id=trace_id)
        self.finish_call(suggested_question_run)

    def dataset_retrieval_trace(self, trace_info: DatasetRetrievalTraceInfo):
@@ -338,6 +347,9 @@ class WeaveDataTrace(BaseTraceInstance):
        attributes["start_time"] = (trace_info.start_time or trace_info.message_data.created_at,)
        attributes["end_time"] = (trace_info.end_time or trace_info.message_data.updated_at,)

+        trace_id = trace_info.trace_id or trace_info.message_id
+        attributes["trace_id"] = trace_id
+
        dataset_retrieval_run = WeaveTraceModel(
            id=str(uuid.uuid4()),
            op=str(TraceTaskName.DATASET_RETRIEVAL_TRACE.value),
@@ -348,7 +360,7 @@ class WeaveDataTrace(BaseTraceInstance):
            file_list=[],
        )

-        self.start_call(dataset_retrieval_run, parent_run_id=trace_info.message_id)
+        self.start_call(dataset_retrieval_run, parent_run_id=trace_id)
        self.finish_call(dataset_retrieval_run)

    def tool_trace(self, trace_info: ToolTraceInfo):
@@ -357,6 +369,11 @@ class WeaveDataTrace(BaseTraceInstance):
        attributes["start_time"] = trace_info.start_time
        attributes["end_time"] = trace_info.end_time

+        message_id = trace_info.message_id or getattr(trace_info, "conversation_id", None)
+        message_id = message_id or None
+        trace_id = trace_info.trace_id or message_id
+        attributes["trace_id"] = trace_id
+
        tool_run = WeaveTraceModel(
            id=str(uuid.uuid4()),
            op=trace_info.tool_name,
@@ -366,9 +383,7 @@ class WeaveDataTrace(BaseTraceInstance):
            attributes=attributes,
            exception=trace_info.error,
        )
-        message_id = trace_info.message_id or getattr(trace_info, "conversation_id", None)
-        message_id = message_id or None
-        self.start_call(tool_run, parent_run_id=message_id)
+        self.start_call(tool_run, parent_run_id=trace_id)
        self.finish_call(tool_run)

    def generate_name_trace(self, trace_info: GenerateNameTraceInfo):
--- a/api/core/plugin/impl/base.py
+++ b/api/core/plugin/impl/base.py
@@ -208,6 +208,7 @@ class BasePluginClient:
                    except Exception:
                        raise PluginDaemonInnerError(code=rep.code, message=rep.message)

+                    logger.error("Error in stream reponse for plugin %s", rep.__dict__)
                    self._handle_plugin_daemon_error(error.error_type, error.message)
                raise ValueError(f"plugin daemon: {rep.message}, code: {rep.code}")
            if rep.data is None:
--- a/api/core/plugin/impl/exc.py
+++ b/api/core/plugin/impl/exc.py
@@ -2,6 +2,8 @@ from collections.abc import Mapping

 from pydantic import TypeAdapter

+from extensions.ext_logging import get_request_id
+

 class PluginDaemonError(Exception):
    """Base class for all plugin daemon errors."""
@@ -11,7 +13,7 @@ class PluginDaemonError(Exception):

    def __str__(self) -> str:
        # returns the class name and description
-        return f"{self.__class__.__name__}: {self.description}"
+        return f"req_id: {get_request_id()} {self.__class__.__name__}: {self.description}"


 class PluginDaemonInternalError(PluginDaemonError):
--- a/api/core/rag/datasource/vdb/clickzetta/README.md
+++ b/api/core/rag/datasource/vdb/clickzetta/README.md
@@ -0,0 +1,190 @@
+# Clickzetta Vector Database Integration
+
+This module provides integration with Clickzetta Lakehouse as a vector database for Dify.
+
+## Features
+
+- **Vector Storage**: Store and retrieve high-dimensional vectors using Clickzetta's native VECTOR type
+- **Vector Search**: Efficient similarity search using HNSW algorithm
+- **Full-Text Search**: Leverage Clickzetta's inverted index for powerful text search capabilities
+- **Hybrid Search**: Combine vector similarity and full-text search for better results
+- **Multi-language Support**: Built-in support for Chinese, English, and Unicode text processing
+- **Scalable**: Leverage Clickzetta's distributed architecture for large-scale deployments
+
+## Configuration
+
+### Required Environment Variables
+
+All seven configuration parameters are required:
+
+```bash
+# Authentication
+CLICKZETTA_USERNAME=your_username
+CLICKZETTA_PASSWORD=your_password
+
+# Instance configuration
+CLICKZETTA_INSTANCE=your_instance_id
+CLICKZETTA_SERVICE=api.clickzetta.com
+CLICKZETTA_WORKSPACE=your_workspace
+CLICKZETTA_VCLUSTER=your_vcluster
+CLICKZETTA_SCHEMA=your_schema
+```
+
+### Optional Configuration
+
+```bash
+# Batch processing
+CLICKZETTA_BATCH_SIZE=100
+
+# Full-text search configuration
+CLICKZETTA_ENABLE_INVERTED_INDEX=true
+CLICKZETTA_ANALYZER_TYPE=chinese  # Options: keyword, english, chinese, unicode
+CLICKZETTA_ANALYZER_MODE=smart    # Options: max_word, smart
+
+# Vector search configuration
+CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance  # Options: l2_distance, cosine_distance
+```
+
+## Usage
+
+### 1. Set Clickzetta as the Vector Store
+
+In your Dify configuration, set:
+
+```bash
+VECTOR_STORE=clickzetta
+```
+
+### 2. Table Structure
+
+Clickzetta will automatically create tables with the following structure:
+
+```sql
+CREATE TABLE <collection_name> (
+    id STRING NOT NULL,
+    content STRING NOT NULL,
+    metadata JSON,
+    vector VECTOR(FLOAT, <dimension>) NOT NULL,
+    PRIMARY KEY (id)
+);
+
+-- Vector index for similarity search
+CREATE VECTOR INDEX idx_<collection_name>_vec
+ON TABLE <schema>.<collection_name>(vector) 
+PROPERTIES (
+    "distance.function" = "cosine_distance",
+    "scalar.type" = "f32"
+);
+
+-- Inverted index for full-text search (if enabled)
+CREATE INVERTED INDEX idx_<collection_name>_text
+ON <schema>.<collection_name>(content)
+PROPERTIES (
+    "analyzer" = "chinese",
+    "mode" = "smart"
+);
+```
+
+## Full-Text Search Capabilities
+
+Clickzetta supports advanced full-text search with multiple analyzers:
+
+### Analyzer Types
+
+1. **keyword**: No tokenization, treats the entire string as a single token
+   - Best for: Exact matching, IDs, codes
+
+2. **english**: Designed for English text
+   - Features: Recognizes ASCII letters and numbers, converts to lowercase
+   - Best for: English content
+
+3. **chinese**: Chinese text tokenizer
+   - Features: Recognizes Chinese and English characters, removes punctuation
+   - Best for: Chinese or mixed Chinese-English content
+
+4. **unicode**: Multi-language tokenizer based on Unicode
+   - Features: Recognizes text boundaries in multiple languages
+   - Best for: Multi-language content
+
+### Analyzer Modes
+
+- **max_word**: Fine-grained tokenization (more tokens)
+- **smart**: Intelligent tokenization (balanced)
+
+### Full-Text Search Functions
+
+- `MATCH_ALL(column, query)`: All terms must be present
+- `MATCH_ANY(column, query)`: At least one term must be present
+- `MATCH_PHRASE(column, query)`: Exact phrase matching
+- `MATCH_PHRASE_PREFIX(column, query)`: Phrase prefix matching
+- `MATCH_REGEXP(column, pattern)`: Regular expression matching
+
+## Performance Optimization
+
+### Vector Search
+
+1. **Adjust exploration factor** for accuracy vs speed trade-off:
+   ```sql
+   SET cz.vector.index.search.ef=64;
+   ```
+
+2. **Use appropriate distance functions**:
+   - `cosine_distance`: Best for normalized embeddings (e.g., from language models)
+   - `l2_distance`: Best for raw feature vectors
+
+### Full-Text Search
+
+1. **Choose the right analyzer**:
+   - Use `keyword` for exact matching
+   - Use language-specific analyzers for better tokenization
+
+2. **Combine with vector search**:
+   - Pre-filter with full-text search for better performance
+   - Use hybrid search for improved relevance
+
+## Troubleshooting
+
+### Connection Issues
+
+1. Verify all 7 required configuration parameters are set
+2. Check network connectivity to Clickzetta service
+3. Ensure the user has proper permissions on the schema
+
+### Search Performance
+
+1. Verify vector index exists:
+   ```sql
+   SHOW INDEX FROM <schema>.<table_name>;
+   ```
+
+2. Check if vector index is being used:
+   ```sql
+   EXPLAIN SELECT ... WHERE l2_distance(...) < threshold;
+   ```
+   Look for `vector_index_search_type` in the execution plan.
+
+### Full-Text Search Not Working
+
+1. Verify inverted index is created
+2. Check analyzer configuration matches your content language
+3. Use `TOKENIZE()` function to test tokenization:
+   ```sql
+   SELECT TOKENIZE('your text', map('analyzer', 'chinese', 'mode', 'smart'));
+   ```
+
+## Limitations
+
+1. Vector operations don't support `ORDER BY` or `GROUP BY` directly on vector columns
+2. Full-text search relevance scores are not provided by Clickzetta
+3. Inverted index creation may fail for very large existing tables (continue without error)
+4. Index naming constraints:
+   - Index names must be unique within a schema
+   - Only one vector index can be created per column
+   - The implementation uses timestamps to ensure unique index names
+5. A column can only have one vector index at a time
+
+## References
+
+- [Clickzetta Vector Search Documentation](https://yunqi.tech/documents/vector-search)
+- [Clickzetta Inverted Index Documentation](https://yunqi.tech/documents/inverted-index)
+- [Clickzetta SQL Functions](https://yunqi.tech/documents/sql-reference)
--- a/api/core/rag/datasource/vdb/clickzetta/init.py
+++ b/api/core/rag/datasource/vdb/clickzetta/init.py
@@ -0,0 +1 @@
+# Clickzetta Vector Database Integration for Dify
--- a/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
+++ b/api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
--- a/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
+++ b/api/core/rag/datasource/vdb/elasticsearch/elasticsearch_vector.py
@@ -7,6 +7,7 @@ from urllib.parse import urlparse
 import requests
 from elasticsearch import Elasticsearch
 from flask import current_app
+from packaging.version import parse as parse_version
 from pydantic import BaseModel, model_validator

 from core.rag.datasource.vdb.field import Field
@@ -22,22 +23,50 @@ logger = logging.getLogger(__name__)


 class ElasticSearchConfig(BaseModel):
-    host: str
-    port: int
-    username: str
-    password: str
+    # Regular Elasticsearch config
+    host: Optional[str] = None
+    port: Optional[int] = None
+    username: Optional[str] = None
+    password: Optional[str] = None
+
+    # Elastic Cloud specific config
+    cloud_url: Optional[str] = None  # Cloud URL for Elasticsearch Cloud
+    api_key: Optional[str] = None
+
+    # Common config
+    use_cloud: bool = False
+    ca_certs: Optional[str] = None
+    verify_certs: bool = False
+    request_timeout: int = 100000
+    retry_on_timeout: bool = True
+    max_retries: int = 10000

    @model_validator(mode="before")
    @classmethod
    def validate_config(cls, values: dict) -> dict:
-        if not values["host"]:
-            raise ValueError("config HOST is required")
-        if not values["port"]:
-            raise ValueError("config PORT is required")
-        if not values["username"]:
-            raise ValueError("config USERNAME is required")
-        if not values["password"]:
-            raise ValueError("config PASSWORD is required")
+        use_cloud = values.get("use_cloud", False)
+        cloud_url = values.get("cloud_url")
+
+        if use_cloud:
+            # Cloud configuration validation - requires cloud_url and api_key
+            if not cloud_url:
+                raise ValueError("cloud_url is required for Elastic Cloud")
+
+            api_key = values.get("api_key")
+            if not api_key:
+                raise ValueError("api_key is required for Elastic Cloud")
+
+        else:
+            # Regular Elasticsearch validation
+            if not values.get("host"):
+                raise ValueError("config HOST is required for regular Elasticsearch")
+            if not values.get("port"):
+                raise ValueError("config PORT is required for regular Elasticsearch")
+            if not values.get("username"):
+                raise ValueError("config USERNAME is required for regular Elasticsearch")
+            if not values.get("password"):
+                raise ValueError("config PASSWORD is required for regular Elasticsearch")
+
        return values


@@ -50,21 +79,69 @@ class ElasticSearchVector(BaseVector):
        self._attributes = attributes

    def _init_client(self, config: ElasticSearchConfig) -> Elasticsearch:
+        """
+        Initialize Elasticsearch client for both regular Elasticsearch and Elastic Cloud.
+        """
        try:
-            parsed_url = urlparse(config.host)
-            if parsed_url.scheme in {"http", "https"}:
-                hosts = f"{config.host}:{config.port}"
+            # Check if using Elastic Cloud
+            client_config: dict[str, Any]
+            if config.use_cloud and config.cloud_url:
+                client_config = {
+                    "request_timeout": config.request_timeout,
+                    "retry_on_timeout": config.retry_on_timeout,
+                    "max_retries": config.max_retries,
+                    "verify_certs": config.verify_certs,
+                }
+
+                # Parse cloud URL and configure hosts
+                parsed_url = urlparse(config.cloud_url)
+                host = f"{parsed_url.scheme}://{parsed_url.hostname}"
+                if parsed_url.port:
+                    host += f":{parsed_url.port}"
+
+                client_config["hosts"] = [host]
+
+                # API key authentication for cloud
+                client_config["api_key"] = config.api_key
+
+                # SSL settings
+                if config.ca_certs:
+                    client_config["ca_certs"] = config.ca_certs
+
            else:
-                hosts = f"http://{config.host}:{config.port}"
-            client = Elasticsearch(
-                hosts=hosts,
-                basic_auth=(config.username, config.password),
-                request_timeout=100000,
-                retry_on_timeout=True,
-                max_retries=10000,
-            )
-        except requests.exceptions.ConnectionError:
-            raise ConnectionError("Vector database connection error")
+                # Regular Elasticsearch configuration
+                parsed_url = urlparse(config.host or "")
+                if parsed_url.scheme in {"http", "https"}:
+                    hosts = f"{config.host}:{config.port}"
+                    use_https = parsed_url.scheme == "https"
+                else:
+                    hosts = f"http://{config.host}:{config.port}"
+                    use_https = False
+
+                client_config = {
+                    "hosts": [hosts],
+                    "basic_auth": (config.username, config.password),
+                    "request_timeout": config.request_timeout,
+                    "retry_on_timeout": config.retry_on_timeout,
+                    "max_retries": config.max_retries,
+                }
+
+                # Only add SSL settings if using HTTPS
+                if use_https:
+                    client_config["verify_certs"] = config.verify_certs
+                    if config.ca_certs:
+                        client_config["ca_certs"] = config.ca_certs
+
+            client = Elasticsearch(**client_config)
+
+            # Test connection
+            if not client.ping():
+                raise ConnectionError("Failed to connect to Elasticsearch")
+
+        except requests.exceptions.ConnectionError as e:
+            raise ConnectionError(f"Vector database connection error: {str(e)}")
+        except Exception as e:
+            raise ConnectionError(f"Elasticsearch client initialization failed: {str(e)}")

        return client

@@ -73,7 +150,7 @@ class ElasticSearchVector(BaseVector):
        return cast(str, info["version"]["number"])

    def _check_version(self):
-        if self._version < "8.0.0":
+        if parse_version(self._version) < parse_version("8.0.0"):
            raise ValueError("Elasticsearch vector database version must be greater than 8.0.0")

    def get_type(self) -> str:
@@ -209,7 +286,11 @@ class ElasticSearchVector(BaseVector):
                        },
                    }
                }
+
                self._client.indices.create(index=self._collection_name, mappings=mappings)
+                logger.info("Created index %s with dimension %s", self._collection_name, dim)
+            else:
+                logger.info("Collection %s already exists.", self._collection_name)

            redis_client.set(collection_exist_cache_key, 1, ex=3600)

@@ -225,13 +306,51 @@ class ElasticSearchVectorFactory(AbstractVectorFactory):
            dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.ELASTICSEARCH, collection_name))

        config = current_app.config
+
+        # Check if ELASTICSEARCH_USE_CLOUD is explicitly set to false (boolean)
+        use_cloud_env = config.get("ELASTICSEARCH_USE_CLOUD", False)
+
+        if use_cloud_env is False:
+            # Use regular Elasticsearch with config values
+            config_dict = {
+                "use_cloud": False,
+                "host": config.get("ELASTICSEARCH_HOST", "elasticsearch"),
+                "port": config.get("ELASTICSEARCH_PORT", 9200),
+                "username": config.get("ELASTICSEARCH_USERNAME", "elastic"),
+                "password": config.get("ELASTICSEARCH_PASSWORD", "elastic"),
+            }
+        else:
+            # Check for cloud configuration
+            cloud_url = config.get("ELASTICSEARCH_CLOUD_URL")
+            if cloud_url:
+                config_dict = {
+                    "use_cloud": True,
+                    "cloud_url": cloud_url,
+                    "api_key": config.get("ELASTICSEARCH_API_KEY"),
+                }
+            else:
+                # Fallback to regular Elasticsearch
+                config_dict = {
+                    "use_cloud": False,
+                    "host": config.get("ELASTICSEARCH_HOST", "localhost"),
+                    "port": config.get("ELASTICSEARCH_PORT", 9200),
+                    "username": config.get("ELASTICSEARCH_USERNAME", "elastic"),
+                    "password": config.get("ELASTICSEARCH_PASSWORD", ""),
+                }
+
+        # Common configuration
+        config_dict.update(
+            {
+                "ca_certs": str(config.get("ELASTICSEARCH_CA_CERTS")) if config.get("ELASTICSEARCH_CA_CERTS") else None,
+                "verify_certs": bool(config.get("ELASTICSEARCH_VERIFY_CERTS", False)),
+                "request_timeout": int(config.get("ELASTICSEARCH_REQUEST_TIMEOUT", 100000)),
+                "retry_on_timeout": bool(config.get("ELASTICSEARCH_RETRY_ON_TIMEOUT", True)),
+                "max_retries": int(config.get("ELASTICSEARCH_MAX_RETRIES", 10000)),
+            }
+        )
+
        return ElasticSearchVector(
            index_name=collection_name,
-            config=ElasticSearchConfig(
-                host=config.get("ELASTICSEARCH_HOST", "localhost"),
-                port=config.get("ELASTICSEARCH_PORT", 9200),
-                username=config.get("ELASTICSEARCH_USERNAME", ""),
-                password=config.get("ELASTICSEARCH_PASSWORD", ""),
-            ),
+            config=ElasticSearchConfig(**config_dict),
            attributes=[],
        )
--- a/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py
+++ b/api/core/rag/datasource/vdb/tablestore/tablestore_vector.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import math
 from typing import Any, Optional

 import tablestore  # type: ignore
@@ -22,6 +23,7 @@ class TableStoreConfig(BaseModel):
    access_key_secret: Optional[str] = None
    instance_name: Optional[str] = None
    endpoint: Optional[str] = None
+    normalize_full_text_bm25_score: Optional[bool] = False

    @model_validator(mode="before")
    @classmethod
@@ -47,6 +49,7 @@ class TableStoreVector(BaseVector):
            config.access_key_secret,
            config.instance_name,
        )
+        self._normalize_full_text_bm25_score = config.normalize_full_text_bm25_score
        self._table_name = f"{collection_name}"
        self._index_name = f"{collection_name}_idx"
        self._tags_field = f"{Field.METADATA_KEY.value}_tags"
@@ -131,8 +134,8 @@ class TableStoreVector(BaseVector):
        filtered_list = None
        if document_ids_filter:
            filtered_list = ["document_id=" + item for item in document_ids_filter]
-
-        return self._search_by_full_text(query, filtered_list, top_k)
+        score_threshold = float(kwargs.get("score_threshold") or 0.0)
+        return self._search_by_full_text(query, filtered_list, top_k, score_threshold)

    def delete(self) -> None:
        self._delete_table_if_exist()
@@ -318,7 +321,19 @@ class TableStoreVector(BaseVector):
        documents = sorted(documents, key=lambda x: x.metadata["score"] if x.metadata else 0, reverse=True)
        return documents

-    def _search_by_full_text(self, query: str, document_ids_filter: list[str] | None, top_k: int) -> list[Document]:
+    @staticmethod
+    def _normalize_score_exp_decay(score: float, k: float = 0.15) -> float:
+        """
+        Args:
+            score: BM25 search score.
+            k: decay factor, the larger the k, the steeper the low score end
+        """
+        normalized_score = 1 - math.exp(-k * score)
+        return max(0.0, min(1.0, normalized_score))
+
+    def _search_by_full_text(
+        self, query: str, document_ids_filter: list[str] | None, top_k: int, score_threshold: float
+    ) -> list[Document]:
        bool_query = tablestore.BoolQuery(must_queries=[], filter_queries=[], should_queries=[], must_not_queries=[])
        bool_query.must_queries.append(tablestore.MatchQuery(text=query, field_name=Field.CONTENT_KEY.value))

@@ -339,15 +354,27 @@ class TableStoreVector(BaseVector):

        documents = []
        for search_hit in search_response.search_hits:
+            score = None
+            if self._normalize_full_text_bm25_score:
+                score = self._normalize_score_exp_decay(search_hit.score)
+
+            # skip when score is below threshold and use normalize score
+            if score and score <= score_threshold:
+                continue
+
            ots_column_map = {}
            for col in search_hit.row[1]:
                ots_column_map[col[0]] = col[1]

-            vector_str = ots_column_map.get(Field.VECTOR.value)
            metadata_str = ots_column_map.get(Field.METADATA_KEY.value)
-            vector = json.loads(vector_str) if vector_str else None
            metadata = json.loads(metadata_str) if metadata_str else {}

+            vector_str = ots_column_map.get(Field.VECTOR.value)
+            vector = json.loads(vector_str) if vector_str else None
+
+            if score:
+                metadata["score"] = score
+
            documents.append(
                Document(
                    page_content=ots_column_map.get(Field.CONTENT_KEY.value) or "",
@@ -355,6 +382,8 @@ class TableStoreVector(BaseVector):
                    metadata=metadata,
                )
            )
+        if self._normalize_full_text_bm25_score:
+            documents = sorted(documents, key=lambda x: x.metadata["score"] if x.metadata else 0, reverse=True)
        return documents


@@ -375,5 +404,6 @@ class TableStoreVectorFactory(AbstractVectorFactory):
                instance_name=dify_config.TABLESTORE_INSTANCE_NAME,
                access_key_id=dify_config.TABLESTORE_ACCESS_KEY_ID,
                access_key_secret=dify_config.TABLESTORE_ACCESS_KEY_SECRET,
+                normalize_full_text_bm25_score=dify_config.TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE,
            ),
        )
--- a/api/core/rag/datasource/vdb/tencent/tencent_vector.py
+++ b/api/core/rag/datasource/vdb/tencent/tencent_vector.py
@@ -246,6 +246,10 @@ class TencentVector(BaseVector):
        return self._get_search_res(res, score_threshold)

    def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
+        document_ids_filter = kwargs.get("document_ids_filter")
+        filter = None
+        if document_ids_filter:
+            filter = Filter(Filter.In("metadata.document_id", document_ids_filter))
        if not self._enable_hybrid_search:
            return []
        res = self._client.hybrid_search(
@@ -269,6 +273,7 @@ class TencentVector(BaseVector):
            ),
            retrieve_vector=False,
            limit=kwargs.get("top_k", 4),
+            filter=filter,
        )
        score_threshold = float(kwargs.get("score_threshold") or 0.0)
        return self._get_search_res(res, score_threshold)
--- a/api/core/rag/datasource/vdb/vector_factory.py
+++ b/api/core/rag/datasource/vdb/vector_factory.py
@@ -172,6 +172,10 @@ class Vector:
                from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneVectorFactory

                return MatrixoneVectorFactory
+            case VectorType.CLICKZETTA:
+                from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaVectorFactory
+
+                return ClickzettaVectorFactory
            case _:
                raise ValueError(f"Vector store {vector_type} is not supported.")

--- a/api/core/rag/datasource/vdb/vector_type.py
+++ b/api/core/rag/datasource/vdb/vector_type.py
@@ -30,3 +30,4 @@ class VectorType(StrEnum):
    TABLESTORE = "tablestore"
    HUAWEI_CLOUD = "huawei_cloud"
    MATRIXONE = "matrixone"
+    CLICKZETTA = "clickzetta"
--- a/api/core/rag/entities/metadata_entities.py
+++ b/api/core/rag/entities/metadata_entities.py
@@ -13,6 +13,8 @@ SupportedComparisonOperator = Literal[
    "is not",
    "empty",
    "not empty",
+    "in",
+    "not in",
    # for number
    "=",
    "≠",
--- a/api/core/rag/extractor/notion_extractor.py
+++ b/api/core/rag/extractor/notion_extractor.py
@@ -1,5 +1,6 @@
 import json
 import logging
+import operator
 from typing import Any, Optional, cast

 import requests
@@ -130,13 +131,15 @@ class NotionExtractor(BaseExtractor):
                    data[property_name] = value
                row_dict = {k: v for k, v in data.items() if v}
                row_content = ""
-                for key, value in row_dict.items():
+                for key, value in sorted(row_dict.items(), key=operator.itemgetter(0)):
                    if isinstance(value, dict):
                        value_dict = {k: v for k, v in value.items() if v}
                        value_content = "".join(f"{k}:{v} " for k, v in value_dict.items())
                        row_content = row_content + f"{key}:{value_content}\n"
                    else:
                        row_content = row_content + f"{key}:{value}\n"
+                if "url" in result:
+                    row_content = row_content + f"Row Page URL:{result.get('url', '')}\n"
                database_content.append(row_content)

            has_more = response_data.get("has_more", False)
--- a/api/core/rag/extractor/word_extractor.py
+++ b/api/core/rag/extractor/word_extractor.py
@@ -62,7 +62,7 @@ class WordExtractor(BaseExtractor):

    def extract(self) -> list[Document]:
        """Load given path as single page."""
-        content = self.parse_docx(self.file_path, "storage")
+        content = self.parse_docx(self.file_path)
        return [
            Document(
                page_content=content,
@@ -189,23 +189,8 @@ class WordExtractor(BaseExtractor):
                paragraph_content.append(run.text)
        return "".join(paragraph_content).strip()

-    def _parse_paragraph(self, paragraph, image_map):
-        paragraph_content = []
-        for run in paragraph.runs:
-            if run.element.xpath(".//a:blip"):
-                for blip in run.element.xpath(".//a:blip"):
-                    embed_id = blip.get("{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed")
-                    if embed_id:
-                        rel_target = run.part.rels[embed_id].target_ref
-                        if rel_target in image_map:
-                            paragraph_content.append(image_map[rel_target])
-            if run.text.strip():
-                paragraph_content.append(run.text.strip())
-        return " ".join(paragraph_content) if paragraph_content else ""
-
-    def parse_docx(self, docx_path, image_folder):
+    def parse_docx(self, docx_path):
        doc = DocxDocument(docx_path)
-        os.makedirs(image_folder, exist_ok=True)

        content = []

--- a/api/core/rag/splitter/fixed_text_splitter.py
+++ b/api/core/rag/splitter/fixed_text_splitter.py
@@ -5,14 +5,13 @@ from __future__ import annotations
 from typing import Any, Optional

 from core.model_manager import ModelInstance
-from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
+from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenizer import GPT2Tokenizer
 from core.rag.splitter.text_splitter import (
    TS,
    Collection,
    Literal,
    RecursiveCharacterTextSplitter,
    Set,
-    TokenTextSplitter,
    Union,
 )

@@ -45,14 +44,6 @@ class EnhanceRecursiveCharacterTextSplitter(RecursiveCharacterTextSplitter):

            return [len(text) for text in texts]

-        if issubclass(cls, TokenTextSplitter):
-            extra_kwargs = {
-                "model_name": embedding_model_instance.model if embedding_model_instance else "gpt2",
-                "allowed_special": allowed_special,
-                "disallowed_special": disallowed_special,
-            }
-            kwargs = {**kwargs, **extra_kwargs}
-
        return cls(length_function=_character_encoder, **kwargs)


--- a/api/core/tools/__base/tool.py
+++ b/api/core/tools/__base/tool.py
@@ -20,9 +20,6 @@ class Tool(ABC):
    The base class of a tool
    """

-    entity: ToolEntity
-    runtime: ToolRuntime
-
    def __init__(self, entity: ToolEntity, runtime: ToolRuntime) -> None:
        self.entity = entity
        self.runtime = runtime
--- a/api/core/tools/builtin_tool/providers/time/tools/localtime_to_timestamp.py
+++ b/api/core/tools/builtin_tool/providers/time/tools/localtime_to_timestamp.py
@@ -37,12 +37,12 @@ class LocaltimeToTimestampTool(BuiltinTool):
    @staticmethod
    def localtime_to_timestamp(localtime: str, time_format: str, local_tz=None) -> int | None:
        try:
-            if local_tz is None:
-                local_tz = datetime.now().astimezone().tzinfo
-            if isinstance(local_tz, str):
-                local_tz = pytz.timezone(local_tz)
            local_time = datetime.strptime(localtime, time_format)
-            localtime = local_tz.localize(local_time)  # type: ignore
+            if local_tz is None:
+                localtime = local_time.astimezone()  # type: ignore
+            elif isinstance(local_tz, str):
+                local_tz = pytz.timezone(local_tz)
+                localtime = local_tz.localize(local_time)  # type: ignore
            timestamp = int(localtime.timestamp())  # type: ignore
            return timestamp
        except Exception as e:
--- a/api/core/tools/builtin_tool/providers/time/tools/timezone_conversion.py
+++ b/api/core/tools/builtin_tool/providers/time/tools/timezone_conversion.py
@@ -27,7 +27,7 @@ class TimezoneConversionTool(BuiltinTool):
        target_time = self.timezone_convert(current_time, current_timezone, target_timezone)  # type: ignore
        if not target_time:
            yield self.create_text_message(
-                f"Invalid datatime and timezone: {current_time},{current_timezone},{target_timezone}"
+                f"Invalid datetime and timezone: {current_time},{current_timezone},{target_timezone}"
            )
            return

--- a/api/core/tools/builtin_tool/tool.py
+++ b/api/core/tools/builtin_tool/tool.py
@@ -20,8 +20,6 @@ class BuiltinTool(Tool):
    :param meta: the meta data of a tool call processing
    """

-    provider: str
-
    def __init__(self, provider: str, **kwargs):
        super().__init__(**kwargs)
        self.provider = provider
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`# Clickzetta Vector Database Integration for Dify`