Compare commits
190 Commits
release/e-
...
1.7.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0baccb9e82 | ||
|
|
2c81db5a1c | ||
|
|
43411d7a9e | ||
|
|
2dbf20a3e9 | ||
|
|
aaf9fc1562 | ||
|
|
d30f898274 | ||
|
|
4a72fa6268 | ||
|
|
0c5e66bccb | ||
|
|
ff791efe18 | ||
|
|
6083b1d618 | ||
|
|
69c3439c3a | ||
|
|
7ee170f0a7 | ||
|
|
36b221b170 | ||
|
|
d1fc98200c | ||
|
|
bb852ef6d2 | ||
|
|
a17b7b3d89 | ||
|
|
dc65a72d93 | ||
|
|
ea502d36a9 | ||
|
|
79a3c1618f | ||
|
|
0be3b4e7a6 | ||
|
|
5f8967918e | ||
|
|
6900b08134 | ||
|
|
dc641348f6 | ||
|
|
431e0105de | ||
|
|
cbe0d9d053 | ||
|
|
f9abcfd789 | ||
|
|
5a0a2b7e44 | ||
|
|
41345199d8 | ||
|
|
8362365eae | ||
|
|
14e1c16cf2 | ||
|
|
b32b7712e2 | ||
|
|
5cf55fcbab | ||
|
|
57c4fc6bf8 | ||
|
|
92fcf0854b | ||
|
|
f73ec60311 | ||
|
|
1c60b7f070 | ||
|
|
084dcd1a50 | ||
|
|
fd536a943a | ||
|
|
6f80fb72cb | ||
|
|
cb5e2ad9b2 | ||
|
|
62772e8871 | ||
|
|
4b0480c8b3 | ||
|
|
c8c591d73c | ||
|
|
2edd32fdea | ||
|
|
5889059ce4 | ||
|
|
7230497bf4 | ||
|
|
d98071a088 | ||
|
|
ac02c12e49 | ||
|
|
11d29e8d3e | ||
|
|
b44ecf9bf7 | ||
|
|
e60f148824 | ||
|
|
e9045a8838 | ||
|
|
55487ba0c6 | ||
|
|
305ea0a2d5 | ||
|
|
a5ca76befb | ||
|
|
e01510e2a6 | ||
|
|
2931c891a7 | ||
|
|
ad1b1193fa | ||
|
|
85f33fb73d | ||
|
|
f3c98a274b | ||
|
|
f6c7175828 | ||
|
|
d253ca192a | ||
|
|
e072b7dafa | ||
|
|
3ff52f1809 | ||
|
|
ad61b42494 | ||
|
|
c95761f4e6 | ||
|
|
1f15cba9a0 | ||
|
|
3344aaabb6 | ||
|
|
b8ef0c84e6 | ||
|
|
6b8b31ff64 | ||
|
|
823872d294 | ||
|
|
724ec12bf3 | ||
|
|
8aac402b24 | ||
|
|
eb12fd9461 | ||
|
|
ad622cea9e | ||
|
|
2575eaf1d6 | ||
|
|
fc5ed9f316 | ||
|
|
40a11b6942 | ||
|
|
84543a591a | ||
|
|
2cd3fe0dce | ||
|
|
5eb061466f | ||
|
|
52050d3dff | ||
|
|
904af20023 | ||
|
|
4934dbd0e6 | ||
|
|
d080bea20b | ||
|
|
607dfc8be7 | ||
|
|
3b5130b03d | ||
|
|
75f722a959 | ||
|
|
b946378b38 | ||
|
|
0cee57acca | ||
|
|
ab78e12089 | ||
|
|
7fe23a0ca6 | ||
|
|
d8584dc03a | ||
|
|
a724f35672 | ||
|
|
60c7663a80 | ||
|
|
8041808b53 | ||
|
|
146d870098 | ||
|
|
78d2f49e01 | ||
|
|
54c8bd29ee | ||
|
|
406c1952b8 | ||
|
|
0ebcee9a6b | ||
|
|
964fa132cb | ||
|
|
dedd5f571c | ||
|
|
90373c7165 | ||
|
|
d470120a60 | ||
|
|
0c925bd088 | ||
|
|
76d123fe19 | ||
|
|
20f0238aab | ||
|
|
6d5a7684b4 | ||
|
|
7831d44099 | ||
|
|
fbf844efd5 | ||
|
|
99a4bd82b5 | ||
|
|
58608f51da | ||
|
|
ff9fd0cdb2 | ||
|
|
aac849d4f4 | ||
|
|
688d07e9c3 | ||
|
|
f9b3cd1b68 | ||
|
|
b2c8718f35 | ||
|
|
46ba0a8781 | ||
|
|
bc18d4d1b9 | ||
|
|
a4b14fc992 | ||
|
|
be914438a5 | ||
|
|
ec488a4c43 | ||
|
|
f78b903a49 | ||
|
|
fd086b06a6 | ||
|
|
759ded3e3a | ||
|
|
05b002a8b7 | ||
|
|
f7016fd922 | ||
|
|
da5c003f97 | ||
|
|
c33741a5e9 | ||
|
|
872ff3f1d4 | ||
|
|
8ab3fda5a8 | ||
|
|
1821726d4f | ||
|
|
98aea05ad2 | ||
|
|
79ea94483e | ||
|
|
a0a30bfdcc | ||
|
|
caa5928ac4 | ||
|
|
9400832b2b | ||
|
|
a82b55005b | ||
|
|
a434f6240f | ||
|
|
f5e177db89 | ||
|
|
5febd66808 | ||
|
|
afac1fe590 | ||
|
|
4251515b4e | ||
|
|
1b2046da3f | ||
|
|
646900b00c | ||
|
|
142ab74784 | ||
|
|
ffddabde43 | ||
|
|
8c6d87f08a | ||
|
|
270dd955d0 | ||
|
|
4e2129d74f | ||
|
|
07cff1ed2c | ||
|
|
070379a900 | ||
|
|
bbdeb15501 | ||
|
|
28478cdc41 | ||
|
|
11ec62ca70 | ||
|
|
4499cda186 | ||
|
|
c05c5953a8 | ||
|
|
eee576355b | ||
|
|
a3ef869db6 | ||
|
|
a51998e4aa | ||
|
|
0b44edaca9 | ||
|
|
ab163a5f75 | ||
|
|
f17ca26b10 | ||
|
|
0ea010d7ee | ||
|
|
72a2c3decf | ||
|
|
ab7c2cf000 | ||
|
|
6914c1c85e | ||
|
|
ea542d42ca | ||
|
|
cba5bd588c | ||
|
|
00cb1c26a1 | ||
|
|
f4d4a32af2 | ||
|
|
1bf0df03b5 | ||
|
|
ae28ca0b8d | ||
|
|
51a6b9dc57 | ||
|
|
4c65a8091a | ||
|
|
27f400e13f | ||
|
|
7721648867 | ||
|
|
47cc951841 | ||
|
|
63b6026e6e | ||
|
|
84aa38586f | ||
|
|
a70d59d4a6 | ||
|
|
57e0a12ccd | ||
|
|
f5e1fa4bd2 | ||
|
|
a7ce1e5789 | ||
|
|
5f550126b3 | ||
|
|
572a2bbe53 | ||
|
|
537c04745d | ||
|
|
7f004e2f41 | ||
|
|
7c6415551d |
1197
.env.example
Normal file
1197
.env.example
Normal file
File diff suppressed because it is too large
Load Diff
44
.github/ISSUE_TEMPLATE/chore.yaml
vendored
Normal file
44
.github/ISSUE_TEMPLATE/chore.yaml
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
name: "✨ Refactor"
|
||||
description: Refactor existing code for improved readability and maintainability.
|
||||
title: "[Chore/Refactor] "
|
||||
labels:
|
||||
- refactor
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Self Checks
|
||||
description: "To make sure we get to you in time, please check the following :)"
|
||||
options:
|
||||
- label: I have read the [Contributing Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) and [Language Policy](https://github.com/langgenius/dify/issues/1542).
|
||||
required: true
|
||||
- label: This is only for refactoring, if you would like to ask a question, please head to [Discussions](https://github.com/langgenius/dify/discussions/categories/general).
|
||||
required: true
|
||||
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
|
||||
required: true
|
||||
- label: I confirm that I am using English to submit this report, otherwise it will be closed.
|
||||
required: true
|
||||
- label: 【中文用户 & Non English User】请使用英语提交,否则会被关闭 :)
|
||||
required: true
|
||||
- label: "Please do not modify this template :) and fill in all the required fields."
|
||||
required: true
|
||||
- type: textarea
|
||||
id: description
|
||||
attributes:
|
||||
label: Description
|
||||
placeholder: "Describe the refactor you are proposing."
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
id: motivation
|
||||
attributes:
|
||||
label: Motivation
|
||||
placeholder: "Explain why this refactor is necessary."
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
id: additional-context
|
||||
attributes:
|
||||
label: Additional Context
|
||||
placeholder: "Add any other context or screenshots about the request here."
|
||||
validations:
|
||||
required: false
|
||||
3
.github/workflows/api-tests.yml
vendored
3
.github/workflows/api-tests.yml
vendored
@@ -99,3 +99,6 @@ jobs:
|
||||
|
||||
- name: Run Tool
|
||||
run: uv run --project api bash dev/pytest/pytest_tools.sh
|
||||
|
||||
- name: Run TestContainers
|
||||
run: uv run --project api bash dev/pytest/pytest_testcontainers.sh
|
||||
|
||||
1
.github/workflows/autofix.yml
vendored
1
.github/workflows/autofix.yml
vendored
@@ -9,6 +9,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
autofix:
|
||||
if: github.repository == 'langgenius/dify'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
1
.github/workflows/build-push.yml
vendored
1
.github/workflows/build-push.yml
vendored
@@ -7,6 +7,7 @@ on:
|
||||
- "deploy/dev"
|
||||
- "deploy/enterprise"
|
||||
- "build/**"
|
||||
- "release/e-*"
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
|
||||
@@ -1,13 +1,18 @@
|
||||
name: Check i18n Files and Create PR
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [closed]
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- 'web/i18n/en-US/*.ts'
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
pull-requests: write
|
||||
|
||||
jobs:
|
||||
check-and-update:
|
||||
if: github.event.pull_request.merged == true
|
||||
if: github.repository == 'langgenius/dify'
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
@@ -15,8 +20,8 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 2 # last 2 commits
|
||||
persist-credentials: false
|
||||
fetch-depth: 2
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Check for file changes in i18n/en-US
|
||||
id: check_files
|
||||
@@ -27,6 +32,13 @@ jobs:
|
||||
echo "Changed files: $changed_files"
|
||||
if [ -n "$changed_files" ]; then
|
||||
echo "FILES_CHANGED=true" >> $GITHUB_ENV
|
||||
file_args=""
|
||||
for file in $changed_files; do
|
||||
filename=$(basename "$file" .ts)
|
||||
file_args="$file_args --file=$filename"
|
||||
done
|
||||
echo "FILE_ARGS=$file_args" >> $GITHUB_ENV
|
||||
echo "File arguments: $file_args"
|
||||
else
|
||||
echo "FILES_CHANGED=false" >> $GITHUB_ENV
|
||||
fi
|
||||
@@ -49,14 +61,15 @@ jobs:
|
||||
if: env.FILES_CHANGED == 'true'
|
||||
run: pnpm install --frozen-lockfile
|
||||
|
||||
- name: Run npm script
|
||||
- name: Generate i18n translations
|
||||
if: env.FILES_CHANGED == 'true'
|
||||
run: pnpm run auto-gen-i18n
|
||||
run: pnpm run auto-gen-i18n ${{ env.FILE_ARGS }}
|
||||
|
||||
- name: Create Pull Request
|
||||
if: env.FILES_CHANGED == 'true'
|
||||
uses: peter-evans/create-pull-request@v6
|
||||
with:
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
commit-message: Update i18n files based on en-US changes
|
||||
title: 'chore: translate i18n files'
|
||||
body: This PR was automatically created to update i18n files based on changes in en-US locale.
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -215,3 +215,4 @@ mise.toml
|
||||
# AI Assistant
|
||||
.roo/
|
||||
api/.env.backup
|
||||
/clickzetta
|
||||
|
||||
@@ -235,13 +235,17 @@ Quickly deploy Dify to Alibaba cloud with [Alibaba Cloud Computing Nest](https:/
|
||||
|
||||
One-Click deploy Dify to Alibaba Cloud with [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### Deploy to AKS with Azure Devops Pipeline
|
||||
|
||||
One-Click deploy Dify to AKS with [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
For those who'd like to contribute code, see our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||
At the same time, please consider supporting Dify by sharing it on social media and at events and conferences.
|
||||
|
||||
> We are looking for contributors to help translate Dify into languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
|
||||
> We are looking for contributors to help translate Dify into languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
|
||||
|
||||
## Community & contact
|
||||
|
||||
|
||||
@@ -217,13 +217,17 @@ docker compose up -d
|
||||
|
||||
انشر Dify على علي بابا كلاود بنقرة واحدة باستخدام [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### استخدام Azure Devops Pipeline للنشر على AKS
|
||||
|
||||
انشر Dify على AKS بنقرة واحدة باستخدام [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
|
||||
|
||||
|
||||
## المساهمة
|
||||
|
||||
لأولئك الذين يرغبون في المساهمة، انظر إلى [دليل المساهمة](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) لدينا.
|
||||
في الوقت نفسه، يرجى النظر في دعم Dify عن طريق مشاركته على وسائل التواصل الاجتماعي وفي الفعاليات والمؤتمرات.
|
||||
|
||||
> نحن نبحث عن مساهمين لمساعدة في ترجمة Dify إلى لغات أخرى غير اللغة الصينية المندرين أو الإنجليزية. إذا كنت مهتمًا بالمساعدة، يرجى الاطلاع على [README للترجمة](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) لمزيد من المعلومات، واترك لنا تعليقًا في قناة `global-users` على [خادم المجتمع على Discord](https://discord.gg/8Tpq4AcN9c).
|
||||
> نحن نبحث عن مساهمين لمساعدة في ترجمة Dify إلى لغات أخرى غير اللغة الصينية المندرين أو الإنجليزية. إذا كنت مهتمًا بالمساعدة، يرجى الاطلاع على [README للترجمة](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) لمزيد من المعلومات، واترك لنا تعليقًا في قناة `global-users` على [خادم المجتمع على Discord](https://discord.gg/8Tpq4AcN9c).
|
||||
|
||||
**المساهمون**
|
||||
|
||||
|
||||
@@ -235,13 +235,17 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন
|
||||
|
||||
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### AKS-এ ডিপ্লয় করার জন্য Azure Devops Pipeline ব্যবহার
|
||||
|
||||
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) ব্যবহার করে Dify কে AKS-এ এক ক্লিকে ডিপ্লয় করুন
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
যারা কোড অবদান রাখতে চান, তাদের জন্য আমাদের [অবদান নির্দেশিকা] দেখুন (https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)।
|
||||
একই সাথে, সোশ্যাল মিডিয়া এবং ইভেন্ট এবং কনফারেন্সে এটি শেয়ার করে Dify কে সমর্থন করুন।
|
||||
|
||||
> আমরা ম্যান্ডারিন বা ইংরেজি ছাড়া অন্য ভাষায় Dify অনুবাদ করতে সাহায্য করার জন্য অবদানকারীদের খুঁজছি। আপনি যদি সাহায্য করতে আগ্রহী হন, তাহলে আরও তথ্যের জন্য [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) দেখুন এবং আমাদের [ডিসকর্ড কমিউনিটি সার্ভার](https://discord.gg/8Tpq4AcN9c) এর `গ্লোবাল-ইউজারস` চ্যানেলে আমাদের একটি মন্তব্য করুন।
|
||||
> আমরা ম্যান্ডারিন বা ইংরেজি ছাড়া অন্য ভাষায় Dify অনুবাদ করতে সাহায্য করার জন্য অবদানকারীদের খুঁজছি। আপনি যদি সাহায্য করতে আগ্রহী হন, তাহলে আরও তথ্যের জন্য [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) দেখুন এবং আমাদের [ডিসকর্ড কমিউনিটি সার্ভার](https://discord.gg/8Tpq4AcN9c) এর `গ্লোবাল-ইউজারস` চ্যানেলে আমাদের একটি মন্তব্য করুন।
|
||||
|
||||
## কমিউনিটি এবং যোগাযোগ
|
||||
|
||||
|
||||
@@ -233,6 +233,9 @@ docker compose up -d
|
||||
|
||||
使用 [阿里云数据管理DMS](https://help.aliyun.com/zh/dms/dify-in-invitational-preview) 将 Dify 一键部署到 阿里云
|
||||
|
||||
#### 使用 Azure Devops Pipeline 部署到AKS
|
||||
|
||||
使用[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) 将 Dify 一键部署到 AKS
|
||||
|
||||
## Star History
|
||||
|
||||
@@ -244,7 +247,7 @@ docker compose up -d
|
||||
对于那些想要贡献代码的人,请参阅我们的[贡献指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。
|
||||
同时,请考虑通过社交媒体、活动和会议来支持 Dify 的分享。
|
||||
|
||||
> 我们正在寻找贡献者来帮助将 Dify 翻译成除了中文和英文之外的其他语言。如果您有兴趣帮助,请参阅我们的[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)获取更多信息,并在我们的[Discord 社区服务器](https://discord.gg/8Tpq4AcN9c)的`global-users`频道中留言。
|
||||
> 我们正在寻找贡献者来帮助将 Dify 翻译成除了中文和英文之外的其他语言。如果您有兴趣帮助,请参阅我们的[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)获取更多信息,并在我们的[Discord 社区服务器](https://discord.gg/8Tpq4AcN9c)的`global-users`频道中留言。
|
||||
|
||||
**Contributors**
|
||||
|
||||
|
||||
@@ -230,13 +230,17 @@ Bereitstellung von Dify auf AWS mit [CDK](https://aws.amazon.com/cdk/)
|
||||
|
||||
Ein-Klick-Bereitstellung von Dify in der Alibaba Cloud mit [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### Verwendung von Azure Devops Pipeline für AKS-Bereitstellung
|
||||
|
||||
Stellen Sie Dify mit einem Klick in AKS bereit, indem Sie [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) verwenden
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
Falls Sie Code beitragen möchten, lesen Sie bitte unseren [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). Gleichzeitig bitten wir Sie, Dify zu unterstützen, indem Sie es in den sozialen Medien teilen und auf Veranstaltungen und Konferenzen präsentieren.
|
||||
|
||||
|
||||
> Wir suchen Mitwirkende, die dabei helfen, Dify in weitere Sprachen zu übersetzen – außer Mandarin oder Englisch. Wenn Sie Interesse an einer Mitarbeit haben, lesen Sie bitte die [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) für weitere Informationen und hinterlassen Sie einen Kommentar im `global-users`-Kanal unseres [Discord Community Servers](https://discord.gg/8Tpq4AcN9c).
|
||||
> Wir suchen Mitwirkende, die dabei helfen, Dify in weitere Sprachen zu übersetzen – außer Mandarin oder Englisch. Wenn Sie Interesse an einer Mitarbeit haben, lesen Sie bitte die [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) für weitere Informationen und hinterlassen Sie einen Kommentar im `global-users`-Kanal unseres [Discord Community Servers](https://discord.gg/8Tpq4AcN9c).
|
||||
|
||||
## Gemeinschaft & Kontakt
|
||||
|
||||
|
||||
@@ -230,6 +230,10 @@ Despliegue Dify en AWS usando [CDK](https://aws.amazon.com/cdk/)
|
||||
|
||||
Despliega Dify en Alibaba Cloud con un solo clic con [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### Uso de Azure Devops Pipeline para implementar en AKS
|
||||
|
||||
Implementa Dify en AKS con un clic usando [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
|
||||
|
||||
|
||||
## Contribuir
|
||||
|
||||
@@ -237,7 +241,7 @@ Para aquellos que deseen contribuir con código, consulten nuestra [Guía de con
|
||||
Al mismo tiempo, considera apoyar a Dify compartiéndolo en redes sociales y en eventos y conferencias.
|
||||
|
||||
|
||||
> Estamos buscando colaboradores para ayudar con la traducción de Dify a idiomas que no sean el mandarín o el inglés. Si estás interesado en ayudar, consulta el [README de i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para obtener más información y déjanos un comentario en el canal `global-users` de nuestro [Servidor de Comunidad en Discord](https://discord.gg/8Tpq4AcN9c).
|
||||
> Estamos buscando colaboradores para ayudar con la traducción de Dify a idiomas que no sean el mandarín o el inglés. Si estás interesado en ayudar, consulta el [README de i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para obtener más información y déjanos un comentario en el canal `global-users` de nuestro [Servidor de Comunidad en Discord](https://discord.gg/8Tpq4AcN9c).
|
||||
|
||||
**Contribuidores**
|
||||
|
||||
|
||||
@@ -228,6 +228,10 @@ Déployez Dify sur AWS en utilisant [CDK](https://aws.amazon.com/cdk/)
|
||||
|
||||
Déployez Dify en un clic sur Alibaba Cloud avec [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### Utilisation d'Azure Devops Pipeline pour déployer sur AKS
|
||||
|
||||
Déployez Dify sur AKS en un clic en utilisant [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
|
||||
|
||||
|
||||
## Contribuer
|
||||
|
||||
@@ -235,7 +239,7 @@ Pour ceux qui souhaitent contribuer du code, consultez notre [Guide de contribut
|
||||
Dans le même temps, veuillez envisager de soutenir Dify en le partageant sur les réseaux sociaux et lors d'événements et de conférences.
|
||||
|
||||
|
||||
> Nous recherchons des contributeurs pour aider à traduire Dify dans des langues autres que le mandarin ou l'anglais. Si vous êtes intéressé à aider, veuillez consulter le [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) pour plus d'informations, et laissez-nous un commentaire dans le canal `global-users` de notre [Serveur communautaire Discord](https://discord.gg/8Tpq4AcN9c).
|
||||
> Nous recherchons des contributeurs pour aider à traduire Dify dans des langues autres que le mandarin ou l'anglais. Si vous êtes intéressé à aider, veuillez consulter le [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) pour plus d'informations, et laissez-nous un commentaire dans le canal `global-users` de notre [Serveur communautaire Discord](https://discord.gg/8Tpq4AcN9c).
|
||||
|
||||
**Contributeurs**
|
||||
|
||||
|
||||
@@ -227,6 +227,10 @@ docker compose up -d
|
||||
#### Alibaba Cloud Data Management
|
||||
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) を利用して、DifyをAlibaba Cloudへワンクリックでデプロイできます
|
||||
|
||||
#### AKSへのデプロイにAzure Devops Pipelineを使用
|
||||
|
||||
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)を使用してDifyをAKSにワンクリックでデプロイ
|
||||
|
||||
|
||||
## 貢献
|
||||
|
||||
@@ -234,7 +238,7 @@ docker compose up -d
|
||||
同時に、DifyをSNSやイベント、カンファレンスで共有してサポートしていただけると幸いです。
|
||||
|
||||
|
||||
> Difyを英語または中国語以外の言語に翻訳してくれる貢献者を募集しています。興味がある場合は、詳細については[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)を参照してください。また、[Discordコミュニティサーバー](https://discord.gg/8Tpq4AcN9c)の`global-users`チャンネルにコメントを残してください。
|
||||
> Difyを英語または中国語以外の言語に翻訳してくれる貢献者を募集しています。興味がある場合は、詳細については[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)を参照してください。また、[Discordコミュニティサーバー](https://discord.gg/8Tpq4AcN9c)の`global-users`チャンネルにコメントを残してください。
|
||||
|
||||
**貢献者**
|
||||
|
||||
|
||||
@@ -228,6 +228,10 @@ wa'logh nIqHom neH ghun deployment toy'wI' [CDK](https://aws.amazon.com/cdk/) lo
|
||||
|
||||
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### AKS 'e' Deploy je Azure Devops Pipeline lo'laH
|
||||
|
||||
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) lo'laH Dify AKS 'e' wa'DIch click 'e' Deploy
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
@@ -235,7 +239,7 @@ For those who'd like to contribute code, see our [Contribution Guide](https://gi
|
||||
At the same time, please consider supporting Dify by sharing it on social media and at events and conferences.
|
||||
|
||||
|
||||
> We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
|
||||
> We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
|
||||
|
||||
**Contributors**
|
||||
|
||||
|
||||
@@ -222,6 +222,10 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했
|
||||
|
||||
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)를 통해 원클릭으로 Dify를 Alibaba Cloud에 배포할 수 있습니다
|
||||
|
||||
#### AKS에 배포하기 위해 Azure Devops Pipeline 사용
|
||||
|
||||
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)을 사용하여 Dify를 AKS에 원클릭으로 배포
|
||||
|
||||
|
||||
## 기여
|
||||
|
||||
@@ -229,7 +233,7 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했
|
||||
동시에 Dify를 소셜 미디어와 행사 및 컨퍼런스에 공유하여 지원하는 것을 고려해 주시기 바랍니다.
|
||||
|
||||
|
||||
> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요.
|
||||
> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요.
|
||||
|
||||
**기여자**
|
||||
|
||||
|
||||
@@ -227,13 +227,17 @@ Implante o Dify na AWS usando [CDK](https://aws.amazon.com/cdk/)
|
||||
|
||||
Implante o Dify na Alibaba Cloud com um clique usando o [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### Usando Azure Devops Pipeline para Implantar no AKS
|
||||
|
||||
Implante o Dify no AKS com um clique usando [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
|
||||
|
||||
|
||||
## Contribuindo
|
||||
|
||||
Para aqueles que desejam contribuir com código, veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
|
||||
Ao mesmo tempo, considere apoiar o Dify compartilhando-o nas redes sociais e em eventos e conferências.
|
||||
|
||||
> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c).
|
||||
> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c).
|
||||
|
||||
**Contribuidores**
|
||||
|
||||
|
||||
@@ -228,6 +228,10 @@ Uvedite Dify v AWS z uporabo [CDK](https://aws.amazon.com/cdk/)
|
||||
|
||||
Z enim klikom namestite Dify na Alibaba Cloud z [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### Uporaba Azure Devops Pipeline za uvajanje v AKS
|
||||
|
||||
Z enim klikom namestite Dify v AKS z uporabo [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
|
||||
|
||||
|
||||
## Prispevam
|
||||
|
||||
|
||||
@@ -221,13 +221,17 @@ Dify'ı bulut platformuna tek tıklamayla dağıtın [terraform](https://www.ter
|
||||
|
||||
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) kullanarak Dify'ı tek tıkla Alibaba Cloud'a dağıtın
|
||||
|
||||
#### AKS'ye Dağıtım için Azure Devops Pipeline Kullanımı
|
||||
|
||||
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) kullanarak Dify'ı tek tıkla AKS'ye dağıtın
|
||||
|
||||
|
||||
## Katkıda Bulunma
|
||||
|
||||
Kod katkısında bulunmak isteyenler için [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakabilirsiniz.
|
||||
Aynı zamanda, lütfen Dify'ı sosyal medyada, etkinliklerde ve konferanslarda paylaşarak desteklemeyi düşünün.
|
||||
|
||||
> Dify'ı Mandarin veya İngilizce dışındaki dillere çevirmemize yardımcı olacak katkıda bulunanlara ihtiyacımız var. Yardımcı olmakla ilgileniyorsanız, lütfen daha fazla bilgi için [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) dosyasına bakın ve [Discord Topluluk Sunucumuzdaki](https://discord.gg/8Tpq4AcN9c) `global-users` kanalında bize bir yorum bırakın.
|
||||
> Dify'ı Mandarin veya İngilizce dışındaki dillere çevirmemize yardımcı olacak katkıda bulunanlara ihtiyacımız var. Yardımcı olmakla ilgileniyorsanız, lütfen daha fazla bilgi için [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) dosyasına bakın ve [Discord Topluluk Sunucumuzdaki](https://discord.gg/8Tpq4AcN9c) `global-users` kanalında bize bir yorum bırakın.
|
||||
|
||||
**Katkıda Bulunanlar**
|
||||
|
||||
|
||||
@@ -233,13 +233,17 @@ Dify 的所有功能都提供相應的 API,因此您可以輕鬆地將 Dify
|
||||
|
||||
透過 [阿里雲數據管理DMS](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/),一鍵將 Dify 部署至阿里雲
|
||||
|
||||
#### 使用 Azure Devops Pipeline 部署到AKS
|
||||
|
||||
使用[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) 將 Dify 一鍵部署到 AKS
|
||||
|
||||
|
||||
## 貢獻
|
||||
|
||||
對於想要貢獻程式碼的開發者,請參閱我們的[貢獻指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。
|
||||
同時,也請考慮透過在社群媒體和各種活動與會議上分享 Dify 來支持我們。
|
||||
|
||||
> 我們正在尋找貢獻者協助將 Dify 翻譯成中文和英文以外的語言。如果您有興趣幫忙,請查看 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) 獲取更多資訊,並在我們的 [Discord 社群伺服器](https://discord.gg/8Tpq4AcN9c) 的 `global-users` 頻道留言給我們。
|
||||
> 我們正在尋找貢獻者協助將 Dify 翻譯成中文和英文以外的語言。如果您有興趣幫忙,請查看 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) 獲取更多資訊,並在我們的 [Discord 社群伺服器](https://discord.gg/8Tpq4AcN9c) 的 `global-users` 頻道留言給我們。
|
||||
|
||||
## 社群與聯絡方式
|
||||
|
||||
|
||||
@@ -224,6 +224,10 @@ Triển khai Dify trên AWS bằng [CDK](https://aws.amazon.com/cdk/)
|
||||
|
||||
Triển khai Dify lên Alibaba Cloud chỉ với một cú nhấp chuột bằng [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
|
||||
|
||||
#### Sử dụng Azure Devops Pipeline để Triển khai lên AKS
|
||||
|
||||
Triển khai Dify lên AKS chỉ với một cú nhấp chuột bằng [Azure Devops Pipeline Helm Chart bởi @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
|
||||
|
||||
|
||||
## Đóng góp
|
||||
|
||||
@@ -231,7 +235,7 @@ Triển khai Dify lên Alibaba Cloud chỉ với một cú nhấp chuột bằng
|
||||
Đồng thời, vui lòng xem xét hỗ trợ Dify bằng cách chia sẻ nó trên mạng xã hội và tại các sự kiện và hội nghị.
|
||||
|
||||
|
||||
> Chúng tôi đang tìm kiếm người đóng góp để giúp dịch Dify sang các ngôn ngữ khác ngoài tiếng Trung hoặc tiếng Anh. Nếu bạn quan tâm đến việc giúp đỡ, vui lòng xem [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) để biết thêm thông tin và để lại bình luận cho chúng tôi trong kênh `global-users` của [Máy chủ Cộng đồng Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi.
|
||||
> Chúng tôi đang tìm kiếm người đóng góp để giúp dịch Dify sang các ngôn ngữ khác ngoài tiếng Trung hoặc tiếng Anh. Nếu bạn quan tâm đến việc giúp đỡ, vui lòng xem [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) để biết thêm thông tin và để lại bình luận cho chúng tôi trong kênh `global-users` của [Máy chủ Cộng đồng Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi.
|
||||
|
||||
**Người đóng góp**
|
||||
|
||||
|
||||
@@ -232,6 +232,7 @@ TABLESTORE_ENDPOINT=https://instance-name.cn-hangzhou.ots.aliyuncs.com
|
||||
TABLESTORE_INSTANCE_NAME=instance-name
|
||||
TABLESTORE_ACCESS_KEY_ID=xxx
|
||||
TABLESTORE_ACCESS_KEY_SECRET=xxx
|
||||
TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE=false
|
||||
|
||||
# Tidb Vector configuration
|
||||
TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com
|
||||
|
||||
@@ -19,7 +19,7 @@ RUN apt-get update \
|
||||
|
||||
# Install Python dependencies
|
||||
COPY pyproject.toml uv.lock ./
|
||||
RUN uv sync --locked
|
||||
RUN uv sync --locked --no-dev
|
||||
|
||||
# production stage
|
||||
FROM base AS production
|
||||
|
||||
@@ -5,10 +5,11 @@ import secrets
|
||||
from typing import Any, Optional
|
||||
|
||||
import click
|
||||
import sqlalchemy as sa
|
||||
from flask import current_app
|
||||
from pydantic import TypeAdapter
|
||||
from sqlalchemy import select
|
||||
from werkzeug.exceptions import NotFound
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
from configs import dify_config
|
||||
from constants.languages import languages
|
||||
@@ -180,8 +181,8 @@ def migrate_annotation_vector_database():
|
||||
)
|
||||
if not apps:
|
||||
break
|
||||
except NotFound:
|
||||
break
|
||||
except SQLAlchemyError:
|
||||
raise
|
||||
|
||||
page += 1
|
||||
for app in apps:
|
||||
@@ -307,8 +308,8 @@ def migrate_knowledge_vector_database():
|
||||
)
|
||||
|
||||
datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
|
||||
except NotFound:
|
||||
break
|
||||
except SQLAlchemyError:
|
||||
raise
|
||||
|
||||
page += 1
|
||||
for dataset in datasets:
|
||||
@@ -457,7 +458,7 @@ def convert_to_agent_apps():
|
||||
"""
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query))
|
||||
rs = conn.execute(sa.text(sql_query))
|
||||
|
||||
apps = []
|
||||
for i in rs:
|
||||
@@ -560,8 +561,8 @@ def old_metadata_migration():
|
||||
.order_by(DatasetDocument.created_at.desc())
|
||||
)
|
||||
documents = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
|
||||
except NotFound:
|
||||
break
|
||||
except SQLAlchemyError:
|
||||
raise
|
||||
if not documents:
|
||||
break
|
||||
for document in documents:
|
||||
@@ -702,7 +703,7 @@ def fix_app_site_missing():
|
||||
sql = """select apps.id as id from apps left join sites on sites.app_id=apps.id
|
||||
where sites.id is null limit 1000"""
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql))
|
||||
rs = conn.execute(sa.text(sql))
|
||||
|
||||
processed_count = 0
|
||||
for i in rs:
|
||||
@@ -916,7 +917,7 @@ def clear_orphaned_file_records(force: bool):
|
||||
)
|
||||
orphaned_message_files = []
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(query))
|
||||
rs = conn.execute(sa.text(query))
|
||||
for i in rs:
|
||||
orphaned_message_files.append({"id": str(i[0]), "message_id": str(i[1])})
|
||||
|
||||
@@ -937,7 +938,7 @@ def clear_orphaned_file_records(force: bool):
|
||||
click.echo(click.style("- Deleting orphaned message_files records", fg="white"))
|
||||
query = "DELETE FROM message_files WHERE id IN :ids"
|
||||
with db.engine.begin() as conn:
|
||||
conn.execute(db.text(query), {"ids": tuple([record["id"] for record in orphaned_message_files])})
|
||||
conn.execute(sa.text(query), {"ids": tuple([record["id"] for record in orphaned_message_files])})
|
||||
click.echo(
|
||||
click.style(f"Removed {len(orphaned_message_files)} orphaned message_files records.", fg="green")
|
||||
)
|
||||
@@ -954,7 +955,7 @@ def clear_orphaned_file_records(force: bool):
|
||||
click.echo(click.style(f"- Listing file records in table {files_table['table']}", fg="white"))
|
||||
query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}"
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(query))
|
||||
rs = conn.execute(sa.text(query))
|
||||
for i in rs:
|
||||
all_files_in_tables.append({"table": files_table["table"], "id": str(i[0]), "key": i[1]})
|
||||
click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white"))
|
||||
@@ -974,7 +975,7 @@ def clear_orphaned_file_records(force: bool):
|
||||
f"SELECT {ids_table['column']} FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL"
|
||||
)
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(query))
|
||||
rs = conn.execute(sa.text(query))
|
||||
for i in rs:
|
||||
all_ids_in_tables.append({"table": ids_table["table"], "id": str(i[0])})
|
||||
elif ids_table["type"] == "text":
|
||||
@@ -989,7 +990,7 @@ def clear_orphaned_file_records(force: bool):
|
||||
f"FROM {ids_table['table']}"
|
||||
)
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(query))
|
||||
rs = conn.execute(sa.text(query))
|
||||
for i in rs:
|
||||
for j in i[0]:
|
||||
all_ids_in_tables.append({"table": ids_table["table"], "id": j})
|
||||
@@ -1008,7 +1009,7 @@ def clear_orphaned_file_records(force: bool):
|
||||
f"FROM {ids_table['table']}"
|
||||
)
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(query))
|
||||
rs = conn.execute(sa.text(query))
|
||||
for i in rs:
|
||||
for j in i[0]:
|
||||
all_ids_in_tables.append({"table": ids_table["table"], "id": j})
|
||||
@@ -1037,7 +1038,7 @@ def clear_orphaned_file_records(force: bool):
|
||||
click.echo(click.style(f"- Deleting orphaned file records in table {files_table['table']}", fg="white"))
|
||||
query = f"DELETE FROM {files_table['table']} WHERE {files_table['id_column']} IN :ids"
|
||||
with db.engine.begin() as conn:
|
||||
conn.execute(db.text(query), {"ids": tuple(orphaned_files)})
|
||||
conn.execute(sa.text(query), {"ids": tuple(orphaned_files)})
|
||||
except Exception as e:
|
||||
click.echo(click.style(f"Error deleting orphaned file records: {str(e)}", fg="red"))
|
||||
return
|
||||
@@ -1107,7 +1108,7 @@ def remove_orphaned_files_on_storage(force: bool):
|
||||
click.echo(click.style(f"- Listing files from table {files_table['table']}", fg="white"))
|
||||
query = f"SELECT {files_table['key_column']} FROM {files_table['table']}"
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(query))
|
||||
rs = conn.execute(sa.text(query))
|
||||
for i in rs:
|
||||
all_files_in_tables.append(str(i[0]))
|
||||
click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white"))
|
||||
|
||||
@@ -330,17 +330,17 @@ class HttpConfig(BaseSettings):
|
||||
def WEB_API_CORS_ALLOW_ORIGINS(self) -> list[str]:
|
||||
return self.inner_WEB_API_CORS_ALLOW_ORIGINS.split(",")
|
||||
|
||||
HTTP_REQUEST_MAX_CONNECT_TIMEOUT: Annotated[
|
||||
PositiveInt, Field(ge=10, description="Maximum connection timeout in seconds for HTTP requests")
|
||||
] = 10
|
||||
HTTP_REQUEST_MAX_CONNECT_TIMEOUT: int = Field(
|
||||
ge=1, description="Maximum connection timeout in seconds for HTTP requests", default=10
|
||||
)
|
||||
|
||||
HTTP_REQUEST_MAX_READ_TIMEOUT: Annotated[
|
||||
PositiveInt, Field(ge=60, description="Maximum read timeout in seconds for HTTP requests")
|
||||
] = 60
|
||||
HTTP_REQUEST_MAX_READ_TIMEOUT: int = Field(
|
||||
ge=1, description="Maximum read timeout in seconds for HTTP requests", default=60
|
||||
)
|
||||
|
||||
HTTP_REQUEST_MAX_WRITE_TIMEOUT: Annotated[
|
||||
PositiveInt, Field(ge=10, description="Maximum write timeout in seconds for HTTP requests")
|
||||
] = 20
|
||||
HTTP_REQUEST_MAX_WRITE_TIMEOUT: int = Field(
|
||||
ge=1, description="Maximum write timeout in seconds for HTTP requests", default=20
|
||||
)
|
||||
|
||||
HTTP_REQUEST_NODE_MAX_BINARY_SIZE: PositiveInt = Field(
|
||||
description="Maximum allowed size in bytes for binary data in HTTP requests",
|
||||
|
||||
@@ -10,6 +10,7 @@ from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
|
||||
from .storage.amazon_s3_storage_config import S3StorageConfig
|
||||
from .storage.azure_blob_storage_config import AzureBlobStorageConfig
|
||||
from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
|
||||
from .storage.clickzetta_volume_storage_config import ClickZettaVolumeStorageConfig
|
||||
from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
|
||||
from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
|
||||
from .storage.oci_storage_config import OCIStorageConfig
|
||||
@@ -20,6 +21,7 @@ from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
|
||||
from .vdb.analyticdb_config import AnalyticdbConfig
|
||||
from .vdb.baidu_vector_config import BaiduVectorDBConfig
|
||||
from .vdb.chroma_config import ChromaConfig
|
||||
from .vdb.clickzetta_config import ClickzettaConfig
|
||||
from .vdb.couchbase_config import CouchbaseConfig
|
||||
from .vdb.elasticsearch_config import ElasticsearchConfig
|
||||
from .vdb.huawei_cloud_config import HuaweiCloudConfig
|
||||
@@ -52,6 +54,7 @@ class StorageConfig(BaseSettings):
|
||||
"aliyun-oss",
|
||||
"azure-blob",
|
||||
"baidu-obs",
|
||||
"clickzetta-volume",
|
||||
"google-storage",
|
||||
"huawei-obs",
|
||||
"oci-storage",
|
||||
@@ -61,8 +64,9 @@ class StorageConfig(BaseSettings):
|
||||
"local",
|
||||
] = Field(
|
||||
description="Type of storage to use."
|
||||
" Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
|
||||
"'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
|
||||
" Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', "
|
||||
"'clickzetta-volume', 'google-storage', 'huawei-obs', 'oci-storage', 'tencent-cos', "
|
||||
"'volcengine-tos', 'supabase'. Default is 'opendal'.",
|
||||
default="opendal",
|
||||
)
|
||||
|
||||
@@ -140,7 +144,8 @@ class DatabaseConfig(BaseSettings):
|
||||
default="postgresql",
|
||||
)
|
||||
|
||||
@computed_field
|
||||
@computed_field # type: ignore[misc]
|
||||
@property
|
||||
def SQLALCHEMY_DATABASE_URI(self) -> str:
|
||||
db_extras = (
|
||||
f"{self.DB_EXTRAS}&client_encoding={self.DB_CHARSET}" if self.DB_CHARSET else self.DB_EXTRAS
|
||||
@@ -215,7 +220,7 @@ class DatabaseConfig(BaseSettings):
|
||||
|
||||
class CeleryConfig(DatabaseConfig):
|
||||
CELERY_BACKEND: str = Field(
|
||||
description="Backend for Celery task results. Options: 'database', 'redis'.",
|
||||
description="Backend for Celery task results. Options: 'database', 'redis', 'rabbitmq'.",
|
||||
default="redis",
|
||||
)
|
||||
|
||||
@@ -245,7 +250,12 @@ class CeleryConfig(DatabaseConfig):
|
||||
|
||||
@computed_field
|
||||
def CELERY_RESULT_BACKEND(self) -> str | None:
|
||||
return f"db+{self.SQLALCHEMY_DATABASE_URI}" if self.CELERY_BACKEND == "database" else self.CELERY_BROKER_URL
|
||||
if self.CELERY_BACKEND in ("database", "rabbitmq"):
|
||||
return f"db+{self.SQLALCHEMY_DATABASE_URI}"
|
||||
elif self.CELERY_BACKEND == "redis":
|
||||
return self.CELERY_BROKER_URL
|
||||
else:
|
||||
return None
|
||||
|
||||
@property
|
||||
def BROKER_USE_SSL(self) -> bool:
|
||||
@@ -298,6 +308,7 @@ class MiddlewareConfig(
|
||||
AliyunOSSStorageConfig,
|
||||
AzureBlobStorageConfig,
|
||||
BaiduOBSStorageConfig,
|
||||
ClickZettaVolumeStorageConfig,
|
||||
GoogleCloudStorageConfig,
|
||||
HuaweiCloudOBSStorageConfig,
|
||||
OCIStorageConfig,
|
||||
@@ -310,6 +321,7 @@ class MiddlewareConfig(
|
||||
VectorStoreConfig,
|
||||
AnalyticdbConfig,
|
||||
ChromaConfig,
|
||||
ClickzettaConfig,
|
||||
HuaweiCloudConfig,
|
||||
MilvusConfig,
|
||||
MyScaleConfig,
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
"""ClickZetta Volume Storage Configuration"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class ClickZettaVolumeStorageConfig(BaseSettings):
|
||||
"""Configuration for ClickZetta Volume storage."""
|
||||
|
||||
CLICKZETTA_VOLUME_USERNAME: Optional[str] = Field(
|
||||
description="Username for ClickZetta Volume authentication",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_PASSWORD: Optional[str] = Field(
|
||||
description="Password for ClickZetta Volume authentication",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_INSTANCE: Optional[str] = Field(
|
||||
description="ClickZetta instance identifier",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_SERVICE: str = Field(
|
||||
description="ClickZetta service endpoint",
|
||||
default="api.clickzetta.com",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_WORKSPACE: str = Field(
|
||||
description="ClickZetta workspace name",
|
||||
default="quick_start",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_VCLUSTER: str = Field(
|
||||
description="ClickZetta virtual cluster name",
|
||||
default="default_ap",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_SCHEMA: str = Field(
|
||||
description="ClickZetta schema name",
|
||||
default="dify",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_TYPE: str = Field(
|
||||
description="ClickZetta volume type (table|user|external)",
|
||||
default="user",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_NAME: Optional[str] = Field(
|
||||
description="ClickZetta volume name for external volumes",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_TABLE_PREFIX: str = Field(
|
||||
description="Prefix for ClickZetta volume table names",
|
||||
default="dataset_",
|
||||
)
|
||||
|
||||
CLICKZETTA_VOLUME_DIFY_PREFIX: str = Field(
|
||||
description="Directory prefix for User Volume to organize Dify files",
|
||||
default="dify_km",
|
||||
)
|
||||
69
api/configs/middleware/vdb/clickzetta_config.py
Normal file
69
api/configs/middleware/vdb/clickzetta_config.py
Normal file
@@ -0,0 +1,69 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class ClickzettaConfig(BaseModel):
|
||||
"""
|
||||
Clickzetta Lakehouse vector database configuration
|
||||
"""
|
||||
|
||||
CLICKZETTA_USERNAME: Optional[str] = Field(
|
||||
description="Username for authenticating with Clickzetta Lakehouse",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_PASSWORD: Optional[str] = Field(
|
||||
description="Password for authenticating with Clickzetta Lakehouse",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_INSTANCE: Optional[str] = Field(
|
||||
description="Clickzetta Lakehouse instance ID",
|
||||
default=None,
|
||||
)
|
||||
|
||||
CLICKZETTA_SERVICE: Optional[str] = Field(
|
||||
description="Clickzetta API service endpoint (e.g., 'api.clickzetta.com')",
|
||||
default="api.clickzetta.com",
|
||||
)
|
||||
|
||||
CLICKZETTA_WORKSPACE: Optional[str] = Field(
|
||||
description="Clickzetta workspace name",
|
||||
default="default",
|
||||
)
|
||||
|
||||
CLICKZETTA_VCLUSTER: Optional[str] = Field(
|
||||
description="Clickzetta virtual cluster name",
|
||||
default="default_ap",
|
||||
)
|
||||
|
||||
CLICKZETTA_SCHEMA: Optional[str] = Field(
|
||||
description="Database schema name in Clickzetta",
|
||||
default="public",
|
||||
)
|
||||
|
||||
CLICKZETTA_BATCH_SIZE: Optional[int] = Field(
|
||||
description="Batch size for bulk insert operations",
|
||||
default=100,
|
||||
)
|
||||
|
||||
CLICKZETTA_ENABLE_INVERTED_INDEX: Optional[bool] = Field(
|
||||
description="Enable inverted index for full-text search capabilities",
|
||||
default=True,
|
||||
)
|
||||
|
||||
CLICKZETTA_ANALYZER_TYPE: Optional[str] = Field(
|
||||
description="Analyzer type for full-text search: keyword, english, chinese, unicode",
|
||||
default="chinese",
|
||||
)
|
||||
|
||||
CLICKZETTA_ANALYZER_MODE: Optional[str] = Field(
|
||||
description="Analyzer mode for tokenization: max_word (fine-grained) or smart (intelligent)",
|
||||
default="smart",
|
||||
)
|
||||
|
||||
CLICKZETTA_VECTOR_DISTANCE_FUNCTION: Optional[str] = Field(
|
||||
description="Distance function for vector similarity: l2_distance or cosine_distance",
|
||||
default="cosine_distance",
|
||||
)
|
||||
@@ -1,12 +1,13 @@
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import Field, PositiveInt
|
||||
from pydantic import Field, PositiveInt, model_validator
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
|
||||
class ElasticsearchConfig(BaseSettings):
|
||||
"""
|
||||
Configuration settings for Elasticsearch
|
||||
Configuration settings for both self-managed and Elastic Cloud deployments.
|
||||
Can load from environment variables or .env files.
|
||||
"""
|
||||
|
||||
ELASTICSEARCH_HOST: Optional[str] = Field(
|
||||
@@ -28,3 +29,50 @@ class ElasticsearchConfig(BaseSettings):
|
||||
description="Password for authenticating with Elasticsearch (default is 'elastic')",
|
||||
default="elastic",
|
||||
)
|
||||
|
||||
# Elastic Cloud (optional)
|
||||
ELASTICSEARCH_USE_CLOUD: Optional[bool] = Field(
|
||||
description="Set to True to use Elastic Cloud instead of self-hosted Elasticsearch", default=False
|
||||
)
|
||||
ELASTICSEARCH_CLOUD_URL: Optional[str] = Field(
|
||||
description="Full URL for Elastic Cloud deployment (e.g., 'https://example.es.region.aws.found.io:443')",
|
||||
default=None,
|
||||
)
|
||||
ELASTICSEARCH_API_KEY: Optional[str] = Field(
|
||||
description="API key for authenticating with Elastic Cloud", default=None
|
||||
)
|
||||
|
||||
# Common options
|
||||
ELASTICSEARCH_CA_CERTS: Optional[str] = Field(
|
||||
description="Path to CA certificate file for SSL verification", default=None
|
||||
)
|
||||
ELASTICSEARCH_VERIFY_CERTS: bool = Field(
|
||||
description="Whether to verify SSL certificates (default is False)", default=False
|
||||
)
|
||||
ELASTICSEARCH_REQUEST_TIMEOUT: int = Field(
|
||||
description="Request timeout in milliseconds (default is 100000)", default=100000
|
||||
)
|
||||
ELASTICSEARCH_RETRY_ON_TIMEOUT: bool = Field(
|
||||
description="Whether to retry requests on timeout (default is True)", default=True
|
||||
)
|
||||
ELASTICSEARCH_MAX_RETRIES: int = Field(
|
||||
description="Maximum number of retry attempts (default is 10000)", default=10000
|
||||
)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_elasticsearch_config(self):
|
||||
"""Validate Elasticsearch configuration based on deployment type."""
|
||||
if self.ELASTICSEARCH_USE_CLOUD:
|
||||
if not self.ELASTICSEARCH_CLOUD_URL:
|
||||
raise ValueError("ELASTICSEARCH_CLOUD_URL is required when using Elastic Cloud")
|
||||
if not self.ELASTICSEARCH_API_KEY:
|
||||
raise ValueError("ELASTICSEARCH_API_KEY is required when using Elastic Cloud")
|
||||
else:
|
||||
if not self.ELASTICSEARCH_HOST:
|
||||
raise ValueError("ELASTICSEARCH_HOST is required for self-hosted Elasticsearch")
|
||||
if not self.ELASTICSEARCH_USERNAME:
|
||||
raise ValueError("ELASTICSEARCH_USERNAME is required for self-hosted Elasticsearch")
|
||||
if not self.ELASTICSEARCH_PASSWORD:
|
||||
raise ValueError("ELASTICSEARCH_PASSWORD is required for self-hosted Elasticsearch")
|
||||
|
||||
return self
|
||||
|
||||
@@ -28,3 +28,8 @@ class TableStoreConfig(BaseSettings):
|
||||
description="AccessKey secret for the instance name",
|
||||
default=None,
|
||||
)
|
||||
|
||||
TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE: bool = Field(
|
||||
description="Whether to normalize full-text search scores to [0, 1]",
|
||||
default=False,
|
||||
)
|
||||
|
||||
@@ -9,10 +9,10 @@ DEFAULT_FILE_NUMBER_LIMITS = 3
|
||||
IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "webp", "gif", "svg"]
|
||||
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
|
||||
|
||||
VIDEO_EXTENSIONS = ["mp4", "mov", "mpeg", "mpga"]
|
||||
VIDEO_EXTENSIONS = ["mp4", "mov", "mpeg", "webm"]
|
||||
VIDEO_EXTENSIONS.extend([ext.upper() for ext in VIDEO_EXTENSIONS])
|
||||
|
||||
AUDIO_EXTENSIONS = ["mp3", "m4a", "wav", "webm", "amr"]
|
||||
AUDIO_EXTENSIONS = ["mp3", "m4a", "wav", "amr", "mpga"]
|
||||
AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])
|
||||
|
||||
|
||||
|
||||
@@ -84,6 +84,7 @@ from .datasets import (
|
||||
external,
|
||||
hit_testing,
|
||||
metadata,
|
||||
upload_file,
|
||||
website,
|
||||
)
|
||||
|
||||
|
||||
@@ -100,7 +100,7 @@ class AnnotationReplyActionStatusApi(Resource):
|
||||
return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200
|
||||
|
||||
|
||||
class AnnotationListApi(Resource):
|
||||
class AnnotationApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@@ -123,6 +123,23 @@ class AnnotationListApi(Resource):
|
||||
}
|
||||
return response, 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check("annotation")
|
||||
@marshal_with(annotation_fields)
|
||||
def post(self, app_id):
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("question", required=True, type=str, location="json")
|
||||
parser.add_argument("answer", required=True, type=str, location="json")
|
||||
args = parser.parse_args()
|
||||
annotation = AppAnnotationService.insert_app_annotation_directly(args, app_id)
|
||||
return annotation
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@@ -131,8 +148,25 @@ class AnnotationListApi(Resource):
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
AppAnnotationService.clear_all_annotations(app_id)
|
||||
return {"result": "success"}, 204
|
||||
|
||||
# Use request.args.getlist to get annotation_ids array directly
|
||||
annotation_ids = request.args.getlist("annotation_id")
|
||||
|
||||
# If annotation_ids are provided, handle batch deletion
|
||||
if annotation_ids:
|
||||
# Check if any annotation_ids contain empty strings or invalid values
|
||||
if not all(annotation_id.strip() for annotation_id in annotation_ids if annotation_id):
|
||||
return {
|
||||
"code": "bad_request",
|
||||
"message": "annotation_ids are required if the parameter is provided.",
|
||||
}, 400
|
||||
|
||||
result = AppAnnotationService.delete_app_annotations_in_batch(app_id, annotation_ids)
|
||||
return result, 204
|
||||
# If no annotation_ids are provided, handle clearing all annotations
|
||||
else:
|
||||
AppAnnotationService.clear_all_annotations(app_id)
|
||||
return {"result": "success"}, 204
|
||||
|
||||
|
||||
class AnnotationExportApi(Resource):
|
||||
@@ -149,25 +183,6 @@ class AnnotationExportApi(Resource):
|
||||
return response, 200
|
||||
|
||||
|
||||
class AnnotationCreateApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check("annotation")
|
||||
@marshal_with(annotation_fields)
|
||||
def post(self, app_id):
|
||||
if not current_user.is_editor:
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("question", required=True, type=str, location="json")
|
||||
parser.add_argument("answer", required=True, type=str, location="json")
|
||||
args = parser.parse_args()
|
||||
annotation = AppAnnotationService.insert_app_annotation_directly(args, app_id)
|
||||
return annotation
|
||||
|
||||
|
||||
class AnnotationUpdateDeleteApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@@ -210,14 +225,15 @@ class AnnotationBatchImportApi(Resource):
|
||||
raise Forbidden()
|
||||
|
||||
app_id = str(app_id)
|
||||
# get file from request
|
||||
file = request.files["file"]
|
||||
# check file
|
||||
if "file" not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
# get file from request
|
||||
file = request.files["file"]
|
||||
# check file type
|
||||
if not file.filename or not file.filename.lower().endswith(".csv"):
|
||||
raise ValueError("Invalid file type. Only CSV files are allowed")
|
||||
@@ -276,7 +292,7 @@ api.add_resource(AnnotationReplyActionApi, "/apps/<uuid:app_id>/annotation-reply
|
||||
api.add_resource(
|
||||
AnnotationReplyActionStatusApi, "/apps/<uuid:app_id>/annotation-reply/<string:action>/status/<uuid:job_id>"
|
||||
)
|
||||
api.add_resource(AnnotationListApi, "/apps/<uuid:app_id>/annotations")
|
||||
api.add_resource(AnnotationApi, "/apps/<uuid:app_id>/annotations")
|
||||
api.add_resource(AnnotationExportApi, "/apps/<uuid:app_id>/annotations/export")
|
||||
api.add_resource(AnnotationUpdateDeleteApi, "/apps/<uuid:app_id>/annotations/<uuid:annotation_id>")
|
||||
api.add_resource(AnnotationBatchImportApi, "/apps/<uuid:app_id>/annotations/batch-import")
|
||||
|
||||
@@ -28,6 +28,12 @@ from services.feature_service import FeatureService
|
||||
ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "completion"]
|
||||
|
||||
|
||||
def _validate_description_length(description):
|
||||
if description and len(description) > 400:
|
||||
raise ValueError("Description cannot exceed 400 characters.")
|
||||
return description
|
||||
|
||||
|
||||
class AppListApi(Resource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@@ -94,7 +100,7 @@ class AppListApi(Resource):
|
||||
"""Create app"""
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("name", type=str, required=True, location="json")
|
||||
parser.add_argument("description", type=str, location="json")
|
||||
parser.add_argument("description", type=_validate_description_length, location="json")
|
||||
parser.add_argument("mode", type=str, choices=ALLOW_CREATE_APP_MODES, location="json")
|
||||
parser.add_argument("icon_type", type=str, location="json")
|
||||
parser.add_argument("icon", type=str, location="json")
|
||||
@@ -146,7 +152,7 @@ class AppApi(Resource):
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
|
||||
parser.add_argument("description", type=str, location="json")
|
||||
parser.add_argument("description", type=_validate_description_length, location="json")
|
||||
parser.add_argument("icon_type", type=str, location="json")
|
||||
parser.add_argument("icon", type=str, location="json")
|
||||
parser.add_argument("icon_background", type=str, location="json")
|
||||
@@ -189,7 +195,7 @@ class AppCopyApi(Resource):
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("name", type=str, location="json")
|
||||
parser.add_argument("description", type=str, location="json")
|
||||
parser.add_argument("description", type=_validate_description_length, location="json")
|
||||
parser.add_argument("icon_type", type=str, location="json")
|
||||
parser.add_argument("icon", type=str, location="json")
|
||||
parser.add_argument("icon_background", type=str, location="json")
|
||||
|
||||
@@ -5,7 +5,6 @@ from flask_restful import Resource, fields, marshal_with, reqparse
|
||||
from flask_restful.inputs import int_range
|
||||
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
from controllers.console import api
|
||||
from controllers.console.app.error import (
|
||||
CompletionRequestError,
|
||||
@@ -133,7 +132,7 @@ class MessageFeedbackApi(Resource):
|
||||
rating=args.get("rating"),
|
||||
content=None,
|
||||
)
|
||||
except services.errors.message.MessageNotExistsError:
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@@ -67,7 +67,7 @@ WHERE
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append({"date": str(i.date), "message_count": i.message_count})
|
||||
|
||||
@@ -176,7 +176,7 @@ WHERE
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append({"date": str(i.date), "terminal_count": i.terminal_count})
|
||||
|
||||
@@ -234,7 +234,7 @@ WHERE
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append(
|
||||
{"date": str(i.date), "token_count": i.token_count, "total_price": i.total_price, "currency": "USD"}
|
||||
@@ -310,7 +310,7 @@ ORDER BY
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append(
|
||||
{"date": str(i.date), "interactions": float(i.interactions.quantize(Decimal("0.01")))}
|
||||
@@ -373,7 +373,7 @@ WHERE
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append(
|
||||
{
|
||||
@@ -435,7 +435,7 @@ WHERE
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append({"date": str(i.date), "latency": round(i.latency * 1000, 4)})
|
||||
|
||||
@@ -495,7 +495,7 @@ WHERE
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append({"date": str(i.date), "tps": round(i.tokens_per_second, 4)})
|
||||
|
||||
|
||||
@@ -2,6 +2,7 @@ from datetime import datetime
|
||||
from decimal import Decimal
|
||||
|
||||
import pytz
|
||||
import sqlalchemy as sa
|
||||
from flask import jsonify
|
||||
from flask_login import current_user
|
||||
from flask_restful import Resource, reqparse
|
||||
@@ -71,7 +72,7 @@ WHERE
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append({"date": str(i.date), "runs": i.runs})
|
||||
|
||||
@@ -133,7 +134,7 @@ WHERE
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append({"date": str(i.date), "terminal_count": i.terminal_count})
|
||||
|
||||
@@ -195,7 +196,7 @@ WHERE
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append(
|
||||
{
|
||||
@@ -277,7 +278,7 @@ GROUP BY
|
||||
response_data = []
|
||||
|
||||
with db.engine.begin() as conn:
|
||||
rs = conn.execute(db.text(sql_query), arg_dict)
|
||||
rs = conn.execute(sa.text(sql_query), arg_dict)
|
||||
for i in rs:
|
||||
response_data.append(
|
||||
{"date": str(i.date), "interactions": float(i.interactions.quantize(Decimal("0.01")))}
|
||||
|
||||
@@ -113,9 +113,3 @@ class MemberNotInTenantError(BaseHTTPException):
|
||||
error_code = "member_not_in_tenant"
|
||||
description = "The member is not in the workspace."
|
||||
code = 400
|
||||
|
||||
|
||||
class AccountInFreezeError(BaseHTTPException):
|
||||
error_code = "account_in_freeze"
|
||||
description = "This email is temporarily unavailable."
|
||||
code = 400
|
||||
|
||||
@@ -41,7 +41,7 @@ def _validate_name(name):
|
||||
|
||||
|
||||
def _validate_description_length(description):
|
||||
if len(description) > 400:
|
||||
if description and len(description) > 400:
|
||||
raise ValueError("Description cannot exceed 400 characters.")
|
||||
return description
|
||||
|
||||
@@ -113,7 +113,7 @@ class DatasetListApi(Resource):
|
||||
)
|
||||
parser.add_argument(
|
||||
"description",
|
||||
type=str,
|
||||
type=_validate_description_length,
|
||||
nullable=True,
|
||||
required=False,
|
||||
default="",
|
||||
@@ -683,6 +683,7 @@ class DatasetRetrievalSettingApi(Resource):
|
||||
| VectorType.HUAWEI_CLOUD
|
||||
| VectorType.TENCENT
|
||||
| VectorType.MATRIXONE
|
||||
| VectorType.CLICKZETTA
|
||||
):
|
||||
return {
|
||||
"retrieval_method": [
|
||||
@@ -731,6 +732,7 @@ class DatasetRetrievalSettingMockApi(Resource):
|
||||
| VectorType.TENCENT
|
||||
| VectorType.HUAWEI_CLOUD
|
||||
| VectorType.MATRIXONE
|
||||
| VectorType.CLICKZETTA
|
||||
):
|
||||
return {
|
||||
"retrieval_method": [
|
||||
|
||||
@@ -642,7 +642,7 @@ class DocumentIndexingStatusApi(DocumentResource):
|
||||
return marshal(document_dict, document_status_fields)
|
||||
|
||||
|
||||
class DocumentDetailApi(DocumentResource):
|
||||
class DocumentApi(DocumentResource):
|
||||
METADATA_CHOICES = {"all", "only", "without"}
|
||||
|
||||
@setup_required
|
||||
@@ -730,6 +730,28 @@ class DocumentDetailApi(DocumentResource):
|
||||
|
||||
return response, 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
def delete(self, dataset_id, document_id):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
# check user's model setting
|
||||
DatasetService.check_dataset_model_setting(dataset)
|
||||
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
try:
|
||||
DocumentService.delete_document(document)
|
||||
except services.errors.document.DocumentIndexingError:
|
||||
raise DocumentIndexingError("Cannot delete document during indexing.")
|
||||
|
||||
return {"result": "success"}, 204
|
||||
|
||||
|
||||
class DocumentProcessingApi(DocumentResource):
|
||||
@setup_required
|
||||
@@ -768,30 +790,6 @@ class DocumentProcessingApi(DocumentResource):
|
||||
return {"result": "success"}, 200
|
||||
|
||||
|
||||
class DocumentDeleteApi(DocumentResource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
def delete(self, dataset_id, document_id):
|
||||
dataset_id = str(dataset_id)
|
||||
document_id = str(document_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id)
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
# check user's model setting
|
||||
DatasetService.check_dataset_model_setting(dataset)
|
||||
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
try:
|
||||
DocumentService.delete_document(document)
|
||||
except services.errors.document.DocumentIndexingError:
|
||||
raise DocumentIndexingError("Cannot delete document during indexing.")
|
||||
|
||||
return {"result": "success"}, 204
|
||||
|
||||
|
||||
class DocumentMetadataApi(DocumentResource):
|
||||
@setup_required
|
||||
@login_required
|
||||
@@ -1037,11 +1035,10 @@ api.add_resource(
|
||||
api.add_resource(DocumentBatchIndexingEstimateApi, "/datasets/<uuid:dataset_id>/batch/<string:batch>/indexing-estimate")
|
||||
api.add_resource(DocumentBatchIndexingStatusApi, "/datasets/<uuid:dataset_id>/batch/<string:batch>/indexing-status")
|
||||
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-status")
|
||||
api.add_resource(DocumentDetailApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
||||
api.add_resource(DocumentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
||||
api.add_resource(
|
||||
DocumentProcessingApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/<string:action>"
|
||||
)
|
||||
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
||||
api.add_resource(DocumentMetadataApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/metadata")
|
||||
api.add_resource(DocumentStatusApi, "/datasets/<uuid:dataset_id>/documents/status/<string:action>/batch")
|
||||
api.add_resource(DocumentPauseApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause")
|
||||
|
||||
@@ -22,8 +22,8 @@ class DatasetMetadataCreateApi(Resource):
|
||||
@marshal_with(dataset_metadata_fields)
|
||||
def post(self, dataset_id):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("type", type=str, required=True, nullable=True, location="json")
|
||||
parser.add_argument("name", type=str, required=True, nullable=True, location="json")
|
||||
parser.add_argument("type", type=str, required=True, nullable=False, location="json")
|
||||
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
|
||||
args = parser.parse_args()
|
||||
metadata_args = MetadataArgs(**args)
|
||||
|
||||
@@ -56,7 +56,7 @@ class DatasetMetadataApi(Resource):
|
||||
@marshal_with(dataset_metadata_fields)
|
||||
def patch(self, dataset_id, metadata_id):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("name", type=str, required=True, nullable=True, location="json")
|
||||
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
dataset_id_str = str(dataset_id)
|
||||
@@ -127,7 +127,7 @@ class DocumentMetadataEditApi(Resource):
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json")
|
||||
parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json")
|
||||
args = parser.parse_args()
|
||||
metadata_args = MetadataOperationData(**args)
|
||||
|
||||
|
||||
62
api/controllers/console/datasets/upload_file.py
Normal file
62
api/controllers/console/datasets/upload_file.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from flask_login import current_user
|
||||
from flask_restful import Resource
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from controllers.console import api
|
||||
from controllers.console.wraps import (
|
||||
account_initialization_required,
|
||||
setup_required,
|
||||
)
|
||||
from core.file import helpers as file_helpers
|
||||
from extensions.ext_database import db
|
||||
from models.dataset import Dataset
|
||||
from models.model import UploadFile
|
||||
from services.dataset_service import DocumentService
|
||||
|
||||
|
||||
class UploadFileApi(Resource):
|
||||
@setup_required
|
||||
@account_initialization_required
|
||||
def get(self, dataset_id, document_id):
|
||||
"""Get upload file."""
|
||||
# check dataset
|
||||
dataset_id = str(dataset_id)
|
||||
dataset = (
|
||||
db.session.query(Dataset)
|
||||
.filter(Dataset.tenant_id == current_user.current_tenant_id, Dataset.id == dataset_id)
|
||||
.first()
|
||||
)
|
||||
if not dataset:
|
||||
raise NotFound("Dataset not found.")
|
||||
# check document
|
||||
document_id = str(document_id)
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
if not document:
|
||||
raise NotFound("Document not found.")
|
||||
# check upload file
|
||||
if document.data_source_type != "upload_file":
|
||||
raise ValueError(f"Document data source type ({document.data_source_type}) is not upload_file.")
|
||||
data_source_info = document.data_source_info_dict
|
||||
if data_source_info and "upload_file_id" in data_source_info:
|
||||
file_id = data_source_info["upload_file_id"]
|
||||
upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
|
||||
if not upload_file:
|
||||
raise NotFound("UploadFile not found.")
|
||||
else:
|
||||
raise ValueError("Upload file id not found in document data source info.")
|
||||
|
||||
url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
|
||||
return {
|
||||
"id": upload_file.id,
|
||||
"name": upload_file.name,
|
||||
"size": upload_file.size,
|
||||
"extension": upload_file.extension,
|
||||
"url": url,
|
||||
"download_url": f"{url}&as_attachment=true",
|
||||
"mime_type": upload_file.mime_type,
|
||||
"created_by": upload_file.created_by,
|
||||
"created_at": upload_file.created_at.timestamp(),
|
||||
}, 200
|
||||
|
||||
|
||||
api.add_resource(UploadFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/upload-file")
|
||||
@@ -127,7 +127,7 @@ class EducationActivateLimitError(BaseHTTPException):
|
||||
code = 429
|
||||
|
||||
|
||||
class CompilanceRateLimitError(BaseHTTPException):
|
||||
error_code = "compilance_rate_limit"
|
||||
class ComplianceRateLimitError(BaseHTTPException):
|
||||
error_code = "compliance_rate_limit"
|
||||
description = "Rate limit exceeded for downloading compliance report."
|
||||
code = 429
|
||||
|
||||
@@ -58,21 +58,38 @@ class InstalledAppsListApi(Resource):
|
||||
# filter out apps that user doesn't have access to
|
||||
if FeatureService.get_system_features().webapp_auth.enabled:
|
||||
user_id = current_user.id
|
||||
res = []
|
||||
app_ids = [installed_app["app"].id for installed_app in installed_app_list]
|
||||
webapp_settings = EnterpriseService.WebAppAuth.batch_get_app_access_mode_by_id(app_ids)
|
||||
|
||||
# Pre-filter out apps without setting or with sso_verified
|
||||
filtered_installed_apps = []
|
||||
app_id_to_app_code = {}
|
||||
|
||||
for installed_app in installed_app_list:
|
||||
webapp_setting = webapp_settings.get(installed_app["app"].id)
|
||||
if not webapp_setting:
|
||||
app_id = installed_app["app"].id
|
||||
webapp_setting = webapp_settings.get(app_id)
|
||||
if not webapp_setting or webapp_setting.access_mode == "sso_verified":
|
||||
continue
|
||||
if webapp_setting.access_mode == "sso_verified":
|
||||
continue
|
||||
app_code = AppService.get_app_code_by_id(str(installed_app["app"].id))
|
||||
if EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
|
||||
user_id=user_id,
|
||||
app_code=app_code,
|
||||
):
|
||||
app_code = AppService.get_app_code_by_id(str(app_id))
|
||||
app_id_to_app_code[app_id] = app_code
|
||||
filtered_installed_apps.append(installed_app)
|
||||
|
||||
app_codes = list(app_id_to_app_code.values())
|
||||
|
||||
# Batch permission check
|
||||
permissions = EnterpriseService.WebAppAuth.batch_is_user_allowed_to_access_webapps(
|
||||
user_id=user_id,
|
||||
app_codes=app_codes,
|
||||
)
|
||||
|
||||
# Keep only allowed apps
|
||||
res = []
|
||||
for installed_app in filtered_installed_apps:
|
||||
app_id = installed_app["app"].id
|
||||
app_code = app_id_to_app_code[app_id]
|
||||
if permissions.get(app_code):
|
||||
res.append(installed_app)
|
||||
|
||||
installed_app_list = res
|
||||
logger.debug("installed_app_list: %s, user_id: %s", installed_app_list, user_id)
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ from flask_restful import marshal_with, reqparse
|
||||
from flask_restful.inputs import int_range
|
||||
from werkzeug.exceptions import InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
from controllers.console.app.error import (
|
||||
AppMoreLikeThisDisabledError,
|
||||
CompletionRequestError,
|
||||
@@ -29,7 +28,11 @@ from models.model import AppMode
|
||||
from services.app_generate_service import AppGenerateService
|
||||
from services.errors.app import MoreLikeThisDisabledError
|
||||
from services.errors.conversation import ConversationNotExistsError
|
||||
from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
|
||||
from services.errors.message import (
|
||||
FirstMessageNotExistsError,
|
||||
MessageNotExistsError,
|
||||
SuggestedQuestionsAfterAnswerDisabledError,
|
||||
)
|
||||
from services.message_service import MessageService
|
||||
|
||||
|
||||
@@ -52,9 +55,9 @@ class MessageListApi(InstalledAppResource):
|
||||
return MessageService.pagination_by_first_id(
|
||||
app_model, current_user, args["conversation_id"], args["first_id"], args["limit"]
|
||||
)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
except ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.message.FirstMessageNotExistsError:
|
||||
except FirstMessageNotExistsError:
|
||||
raise NotFound("First Message Not Exists.")
|
||||
|
||||
|
||||
@@ -77,7 +80,7 @@ class MessageFeedbackApi(InstalledAppResource):
|
||||
rating=args.get("rating"),
|
||||
content=args.get("content"),
|
||||
)
|
||||
except services.errors.message.MessageNotExistsError:
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@@ -49,7 +49,6 @@ class FileApi(Resource):
|
||||
@marshal_with(file_fields)
|
||||
@cloud_edition_billing_resource_check("documents")
|
||||
def post(self):
|
||||
file = request.files["file"]
|
||||
source_str = request.form.get("source")
|
||||
source: Literal["datasets"] | None = "datasets" if source_str == "datasets" else None
|
||||
|
||||
@@ -58,6 +57,7 @@ class FileApi(Resource):
|
||||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
file = request.files["file"]
|
||||
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
@@ -9,14 +9,13 @@ from configs import dify_config
|
||||
from constants.languages import supported_language
|
||||
from controllers.console import api
|
||||
from controllers.console.auth.error import (
|
||||
AccountInFreezeError,
|
||||
EmailAlreadyInUseError,
|
||||
EmailChangeLimitError,
|
||||
EmailCodeError,
|
||||
InvalidEmailError,
|
||||
InvalidTokenError,
|
||||
)
|
||||
from controllers.console.error import AccountNotFound, EmailSendIpLimitError
|
||||
from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError
|
||||
from controllers.console.workspace.error import (
|
||||
AccountAlreadyInitedError,
|
||||
CurrentPasswordIncorrectError,
|
||||
|
||||
@@ -191,9 +191,6 @@ class WebappLogoWorkspaceApi(Resource):
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check("workspace_custom")
|
||||
def post(self):
|
||||
# get file from request
|
||||
file = request.files["file"]
|
||||
|
||||
# check file
|
||||
if "file" not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
@@ -201,6 +198,8 @@ class WebappLogoWorkspaceApi(Resource):
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
# get file from request
|
||||
file = request.files["file"]
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
|
||||
@@ -6,6 +6,6 @@ bp = Blueprint("service_api", __name__, url_prefix="/v1")
|
||||
api = ExternalApi(bp)
|
||||
|
||||
from . import index
|
||||
from .app import annotation, app, audio, completion, conversation, file, message, site, workflow
|
||||
from .app import annotation, app, audio, completion, conversation, file, file_preview, message, site, workflow
|
||||
from .dataset import dataset, document, hit_testing, metadata, segment, upload_file
|
||||
from .workspace import models
|
||||
|
||||
@@ -2,7 +2,7 @@ import logging
|
||||
|
||||
from flask import request
|
||||
from flask_restful import Resource, reqparse
|
||||
from werkzeug.exceptions import InternalServerError, NotFound
|
||||
from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
from controllers.service_api import api
|
||||
@@ -30,6 +30,7 @@ from libs import helper
|
||||
from libs.helper import uuid_value
|
||||
from models.model import App, AppMode, EndUser
|
||||
from services.app_generate_service import AppGenerateService
|
||||
from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError
|
||||
from services.errors.llm import InvokeRateLimitError
|
||||
|
||||
|
||||
@@ -47,6 +48,9 @@ class CompletionApi(Resource):
|
||||
parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
|
||||
|
||||
args = parser.parse_args()
|
||||
external_trace_id = get_external_trace_id(request)
|
||||
if external_trace_id:
|
||||
args["external_trace_id"] = external_trace_id
|
||||
|
||||
streaming = args["response_mode"] == "streaming"
|
||||
|
||||
@@ -110,7 +114,7 @@ class ChatApi(Resource):
|
||||
parser.add_argument("conversation_id", type=uuid_value, location="json")
|
||||
parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
|
||||
parser.add_argument("auto_generate_name", type=bool, required=False, default=True, location="json")
|
||||
|
||||
parser.add_argument("workflow_id", type=str, required=False, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
external_trace_id = get_external_trace_id(request)
|
||||
@@ -125,6 +129,12 @@ class ChatApi(Resource):
|
||||
)
|
||||
|
||||
return helper.compact_generate_response(response)
|
||||
except WorkflowNotFoundError as ex:
|
||||
raise NotFound(str(ex))
|
||||
except IsDraftWorkflowError as ex:
|
||||
raise BadRequest(str(ex))
|
||||
except WorkflowIdFormatError as ex:
|
||||
raise BadRequest(str(ex))
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationCompletedError:
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
import json
|
||||
|
||||
from flask_restful import Resource, marshal_with, reqparse
|
||||
from flask_restful.inputs import int_range
|
||||
from sqlalchemy.orm import Session
|
||||
from werkzeug.exceptions import NotFound
|
||||
from werkzeug.exceptions import BadRequest, NotFound
|
||||
|
||||
import services
|
||||
from controllers.service_api import api
|
||||
@@ -15,6 +17,7 @@ from fields.conversation_fields import (
|
||||
simple_conversation_fields,
|
||||
)
|
||||
from fields.conversation_variable_fields import (
|
||||
conversation_variable_fields,
|
||||
conversation_variable_infinite_scroll_pagination_fields,
|
||||
)
|
||||
from libs.helper import uuid_value
|
||||
@@ -120,7 +123,41 @@ class ConversationVariablesApi(Resource):
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
|
||||
|
||||
class ConversationVariableDetailApi(Resource):
|
||||
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
|
||||
@marshal_with(conversation_variable_fields)
|
||||
def put(self, app_model: App, end_user: EndUser, c_id, variable_id):
|
||||
"""Update a conversation variable's value"""
|
||||
app_mode = AppMode.value_of(app_model.mode)
|
||||
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
|
||||
raise NotChatAppError()
|
||||
|
||||
conversation_id = str(c_id)
|
||||
variable_id = str(variable_id)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("value", required=True, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
return ConversationService.update_conversation_variable(
|
||||
app_model, conversation_id, variable_id, end_user, json.loads(args["value"])
|
||||
)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.conversation.ConversationVariableNotExistsError:
|
||||
raise NotFound("Conversation Variable Not Exists.")
|
||||
except services.errors.conversation.ConversationVariableTypeMismatchError as e:
|
||||
raise BadRequest(str(e))
|
||||
|
||||
|
||||
api.add_resource(ConversationRenameApi, "/conversations/<uuid:c_id>/name", endpoint="conversation_name")
|
||||
api.add_resource(ConversationApi, "/conversations")
|
||||
api.add_resource(ConversationDetailApi, "/conversations/<uuid:c_id>", endpoint="conversation_detail")
|
||||
api.add_resource(ConversationVariablesApi, "/conversations/<uuid:c_id>/variables", endpoint="conversation_variables")
|
||||
api.add_resource(
|
||||
ConversationVariableDetailApi,
|
||||
"/conversations/<uuid:c_id>/variables/<uuid:variable_id>",
|
||||
endpoint="conversation_variable_detail",
|
||||
methods=["PUT"],
|
||||
)
|
||||
|
||||
@@ -107,3 +107,15 @@ class UnsupportedFileTypeError(BaseHTTPException):
|
||||
error_code = "unsupported_file_type"
|
||||
description = "File type not allowed."
|
||||
code = 415
|
||||
|
||||
|
||||
class FileNotFoundError(BaseHTTPException):
|
||||
error_code = "file_not_found"
|
||||
description = "The requested file was not found."
|
||||
code = 404
|
||||
|
||||
|
||||
class FileAccessDeniedError(BaseHTTPException):
|
||||
error_code = "file_access_denied"
|
||||
description = "Access to the requested file is denied."
|
||||
code = 403
|
||||
|
||||
@@ -20,18 +20,17 @@ class FileApi(Resource):
|
||||
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM))
|
||||
@marshal_with(file_fields)
|
||||
def post(self, app_model: App, end_user: EndUser):
|
||||
file = request.files["file"]
|
||||
|
||||
# check file
|
||||
if "file" not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
|
||||
if not file.mimetype:
|
||||
raise UnsupportedFileTypeError()
|
||||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
file = request.files["file"]
|
||||
if not file.mimetype:
|
||||
raise UnsupportedFileTypeError()
|
||||
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
|
||||
186
api/controllers/service_api/app/file_preview.py
Normal file
186
api/controllers/service_api/app/file_preview.py
Normal file
@@ -0,0 +1,186 @@
|
||||
import logging
|
||||
from urllib.parse import quote
|
||||
|
||||
from flask import Response
|
||||
from flask_restful import Resource, reqparse
|
||||
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.app.error import (
|
||||
FileAccessDeniedError,
|
||||
FileNotFoundError,
|
||||
)
|
||||
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_storage import storage
|
||||
from models.model import App, EndUser, Message, MessageFile, UploadFile
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FilePreviewApi(Resource):
|
||||
"""
|
||||
Service API File Preview endpoint
|
||||
|
||||
Provides secure file preview/download functionality for external API users.
|
||||
Files can only be accessed if they belong to messages within the requesting app's context.
|
||||
"""
|
||||
|
||||
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY))
|
||||
def get(self, app_model: App, end_user: EndUser, file_id: str):
|
||||
"""
|
||||
Preview/Download a file that was uploaded via Service API
|
||||
|
||||
Args:
|
||||
app_model: The authenticated app model
|
||||
end_user: The authenticated end user (optional)
|
||||
file_id: UUID of the file to preview
|
||||
|
||||
Query Parameters:
|
||||
user: Optional user identifier
|
||||
as_attachment: Boolean, whether to download as attachment (default: false)
|
||||
|
||||
Returns:
|
||||
Stream response with file content
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: File does not exist
|
||||
FileAccessDeniedError: File access denied (not owned by app)
|
||||
"""
|
||||
file_id = str(file_id)
|
||||
|
||||
# Parse query parameters
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("as_attachment", type=bool, required=False, default=False, location="args")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate file ownership and get file objects
|
||||
message_file, upload_file = self._validate_file_ownership(file_id, app_model.id)
|
||||
|
||||
# Get file content generator
|
||||
try:
|
||||
generator = storage.load(upload_file.key, stream=True)
|
||||
except Exception as e:
|
||||
raise FileNotFoundError(f"Failed to load file content: {str(e)}")
|
||||
|
||||
# Build response with appropriate headers
|
||||
response = self._build_file_response(generator, upload_file, args["as_attachment"])
|
||||
|
||||
return response
|
||||
|
||||
def _validate_file_ownership(self, file_id: str, app_id: str) -> tuple[MessageFile, UploadFile]:
|
||||
"""
|
||||
Validate that the file belongs to a message within the requesting app's context
|
||||
|
||||
Security validations performed:
|
||||
1. File exists in MessageFile table (was used in a conversation)
|
||||
2. Message belongs to the requesting app
|
||||
3. UploadFile record exists and is accessible
|
||||
4. File tenant matches app tenant (additional security layer)
|
||||
|
||||
Args:
|
||||
file_id: UUID of the file to validate
|
||||
app_id: UUID of the requesting app
|
||||
|
||||
Returns:
|
||||
Tuple of (MessageFile, UploadFile) if validation passes
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: File or related records not found
|
||||
FileAccessDeniedError: File does not belong to the app's context
|
||||
"""
|
||||
try:
|
||||
# Input validation
|
||||
if not file_id or not app_id:
|
||||
raise FileAccessDeniedError("Invalid file or app identifier")
|
||||
|
||||
# First, find the MessageFile that references this upload file
|
||||
message_file = db.session.query(MessageFile).where(MessageFile.upload_file_id == file_id).first()
|
||||
|
||||
if not message_file:
|
||||
raise FileNotFoundError("File not found in message context")
|
||||
|
||||
# Get the message and verify it belongs to the requesting app
|
||||
message = (
|
||||
db.session.query(Message).where(Message.id == message_file.message_id, Message.app_id == app_id).first()
|
||||
)
|
||||
|
||||
if not message:
|
||||
raise FileAccessDeniedError("File access denied: not owned by requesting app")
|
||||
|
||||
# Get the actual upload file record
|
||||
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
|
||||
|
||||
if not upload_file:
|
||||
raise FileNotFoundError("Upload file record not found")
|
||||
|
||||
# Additional security: verify tenant isolation
|
||||
app = db.session.query(App).where(App.id == app_id).first()
|
||||
if app and upload_file.tenant_id != app.tenant_id:
|
||||
raise FileAccessDeniedError("File access denied: tenant mismatch")
|
||||
|
||||
return message_file, upload_file
|
||||
|
||||
except (FileNotFoundError, FileAccessDeniedError):
|
||||
# Re-raise our custom exceptions
|
||||
raise
|
||||
except Exception as e:
|
||||
# Log unexpected errors for debugging
|
||||
logger.exception(
|
||||
"Unexpected error during file ownership validation",
|
||||
extra={"file_id": file_id, "app_id": app_id, "error": str(e)},
|
||||
)
|
||||
raise FileAccessDeniedError("File access validation failed")
|
||||
|
||||
def _build_file_response(self, generator, upload_file: UploadFile, as_attachment: bool = False) -> Response:
|
||||
"""
|
||||
Build Flask Response object with appropriate headers for file streaming
|
||||
|
||||
Args:
|
||||
generator: File content generator from storage
|
||||
upload_file: UploadFile database record
|
||||
as_attachment: Whether to set Content-Disposition as attachment
|
||||
|
||||
Returns:
|
||||
Flask Response object with streaming file content
|
||||
"""
|
||||
response = Response(
|
||||
generator,
|
||||
mimetype=upload_file.mime_type,
|
||||
direct_passthrough=True,
|
||||
headers={},
|
||||
)
|
||||
|
||||
# Add Content-Length if known
|
||||
if upload_file.size and upload_file.size > 0:
|
||||
response.headers["Content-Length"] = str(upload_file.size)
|
||||
|
||||
# Add Accept-Ranges header for audio/video files to support seeking
|
||||
if upload_file.mime_type in [
|
||||
"audio/mpeg",
|
||||
"audio/wav",
|
||||
"audio/mp4",
|
||||
"audio/ogg",
|
||||
"audio/flac",
|
||||
"audio/aac",
|
||||
"video/mp4",
|
||||
"video/webm",
|
||||
"video/quicktime",
|
||||
"audio/x-m4a",
|
||||
]:
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
|
||||
# Set Content-Disposition for downloads
|
||||
if as_attachment and upload_file.name:
|
||||
encoded_filename = quote(upload_file.name)
|
||||
response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}"
|
||||
# Override content-type for downloads to force download
|
||||
response.headers["Content-Type"] = "application/octet-stream"
|
||||
|
||||
# Add caching headers for performance
|
||||
response.headers["Cache-Control"] = "public, max-age=3600" # Cache for 1 hour
|
||||
|
||||
return response
|
||||
|
||||
|
||||
# Register the API endpoint
|
||||
api.add_resource(FilePreviewApi, "/files/<uuid:file_id>/preview")
|
||||
@@ -15,7 +15,11 @@ from fields.message_fields import agent_thought_fields, feedback_fields
|
||||
from fields.raws import FilesContainedField
|
||||
from libs.helper import TimestampField, uuid_value
|
||||
from models.model import App, AppMode, EndUser
|
||||
from services.errors.message import SuggestedQuestionsAfterAnswerDisabledError
|
||||
from services.errors.message import (
|
||||
FirstMessageNotExistsError,
|
||||
MessageNotExistsError,
|
||||
SuggestedQuestionsAfterAnswerDisabledError,
|
||||
)
|
||||
from services.message_service import MessageService
|
||||
|
||||
|
||||
@@ -65,7 +69,7 @@ class MessageListApi(Resource):
|
||||
)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.message.FirstMessageNotExistsError:
|
||||
except FirstMessageNotExistsError:
|
||||
raise NotFound("First Message Not Exists.")
|
||||
|
||||
|
||||
@@ -87,7 +91,7 @@ class MessageFeedbackApi(Resource):
|
||||
rating=args.get("rating"),
|
||||
content=args.get("content"),
|
||||
)
|
||||
except services.errors.message.MessageNotExistsError:
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
|
||||
return {"result": "success"}
|
||||
@@ -117,7 +121,7 @@ class MessageSuggestedApi(Resource):
|
||||
questions = MessageService.get_suggested_questions_after_answer(
|
||||
app_model=app_model, user=end_user, message_id=message_id, invoke_from=InvokeFrom.SERVICE_API
|
||||
)
|
||||
except services.errors.message.MessageNotExistsError:
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
except SuggestedQuestionsAfterAnswerDisabledError:
|
||||
raise BadRequest("Suggested Questions Is Disabled.")
|
||||
|
||||
@@ -5,7 +5,7 @@ from flask import request
|
||||
from flask_restful import Resource, fields, marshal_with, reqparse
|
||||
from flask_restful.inputs import int_range
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
from werkzeug.exceptions import InternalServerError
|
||||
from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
|
||||
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.app.error import (
|
||||
@@ -34,6 +34,7 @@ from libs.helper import TimestampField
|
||||
from models.model import App, AppMode, EndUser
|
||||
from repositories.factory import DifyAPIRepositoryFactory
|
||||
from services.app_generate_service import AppGenerateService
|
||||
from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError
|
||||
from services.errors.llm import InvokeRateLimitError
|
||||
from services.workflow_app_service import WorkflowAppService
|
||||
|
||||
@@ -120,6 +121,59 @@ class WorkflowRunApi(Resource):
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class WorkflowRunByIdApi(Resource):
|
||||
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True))
|
||||
def post(self, app_model: App, end_user: EndUser, workflow_id: str):
|
||||
"""
|
||||
Run specific workflow by ID
|
||||
"""
|
||||
app_mode = AppMode.value_of(app_model.mode)
|
||||
if app_mode != AppMode.WORKFLOW:
|
||||
raise NotWorkflowAppError()
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
|
||||
parser.add_argument("files", type=list, required=False, location="json")
|
||||
parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Add workflow_id to args for AppGenerateService
|
||||
args["workflow_id"] = workflow_id
|
||||
|
||||
external_trace_id = get_external_trace_id(request)
|
||||
if external_trace_id:
|
||||
args["external_trace_id"] = external_trace_id
|
||||
streaming = args.get("response_mode") == "streaming"
|
||||
|
||||
try:
|
||||
response = AppGenerateService.generate(
|
||||
app_model=app_model, user=end_user, args=args, invoke_from=InvokeFrom.SERVICE_API, streaming=streaming
|
||||
)
|
||||
|
||||
return helper.compact_generate_response(response)
|
||||
except WorkflowNotFoundError as ex:
|
||||
raise NotFound(str(ex))
|
||||
except IsDraftWorkflowError as ex:
|
||||
raise BadRequest(str(ex))
|
||||
except WorkflowIdFormatError as ex:
|
||||
raise BadRequest(str(ex))
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
except QuotaExceededError:
|
||||
raise ProviderQuotaExceededError()
|
||||
except ModelCurrentlyNotSupportError:
|
||||
raise ProviderModelCurrentlyNotSupportError()
|
||||
except InvokeRateLimitError as ex:
|
||||
raise InvokeRateLimitHttpError(ex.description)
|
||||
except InvokeError as e:
|
||||
raise CompletionRequestError(e.description)
|
||||
except ValueError as e:
|
||||
raise e
|
||||
except Exception:
|
||||
logging.exception("internal server error.")
|
||||
raise InternalServerError()
|
||||
|
||||
|
||||
class WorkflowTaskStopApi(Resource):
|
||||
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True))
|
||||
def post(self, app_model: App, end_user: EndUser, task_id: str):
|
||||
@@ -193,5 +247,6 @@ class WorkflowAppLogApi(Resource):
|
||||
|
||||
api.add_resource(WorkflowRunApi, "/workflows/run")
|
||||
api.add_resource(WorkflowRunDetailApi, "/workflows/run/<string:workflow_run_id>")
|
||||
api.add_resource(WorkflowRunByIdApi, "/workflows/<string:workflow_id>/run")
|
||||
api.add_resource(WorkflowTaskStopApi, "/workflows/tasks/<string:task_id>/stop")
|
||||
api.add_resource(WorkflowAppLogApi, "/workflows/logs")
|
||||
|
||||
@@ -29,7 +29,7 @@ def _validate_name(name):
|
||||
|
||||
|
||||
def _validate_description_length(description):
|
||||
if len(description) > 400:
|
||||
if description and len(description) > 400:
|
||||
raise ValueError("Description cannot exceed 400 characters.")
|
||||
return description
|
||||
|
||||
@@ -87,7 +87,7 @@ class DatasetListApi(DatasetApiResource):
|
||||
)
|
||||
parser.add_argument(
|
||||
"description",
|
||||
type=str,
|
||||
type=_validate_description_length,
|
||||
nullable=True,
|
||||
required=False,
|
||||
default="",
|
||||
|
||||
@@ -234,8 +234,6 @@ class DocumentAddByFileApi(DatasetApiResource):
|
||||
args["retrieval_model"].get("reranking_model").get("reranking_model_name"),
|
||||
)
|
||||
|
||||
# save file info
|
||||
file = request.files["file"]
|
||||
# check file
|
||||
if "file" not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
@@ -243,6 +241,8 @@ class DocumentAddByFileApi(DatasetApiResource):
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
# save file info
|
||||
file = request.files["file"]
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
@@ -358,39 +358,6 @@ class DocumentUpdateByFileApi(DatasetApiResource):
|
||||
return documents_and_batch_fields, 200
|
||||
|
||||
|
||||
class DocumentDeleteApi(DatasetApiResource):
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
def delete(self, tenant_id, dataset_id, document_id):
|
||||
"""Delete document."""
|
||||
document_id = str(document_id)
|
||||
dataset_id = str(dataset_id)
|
||||
tenant_id = str(tenant_id)
|
||||
|
||||
# get dataset info
|
||||
dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
||||
|
||||
if not dataset:
|
||||
raise ValueError("Dataset does not exist.")
|
||||
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
|
||||
# 404 if document not found
|
||||
if document is None:
|
||||
raise NotFound("Document Not Exists.")
|
||||
|
||||
# 403 if document is archived
|
||||
if DocumentService.check_archived(document):
|
||||
raise ArchivedDocumentImmutableError()
|
||||
|
||||
try:
|
||||
# delete document
|
||||
DocumentService.delete_document(document)
|
||||
except services.errors.document.DocumentIndexingError:
|
||||
raise DocumentIndexingError("Cannot delete document during indexing.")
|
||||
|
||||
return 204
|
||||
|
||||
|
||||
class DocumentListApi(DatasetApiResource):
|
||||
def get(self, tenant_id, dataset_id):
|
||||
dataset_id = str(dataset_id)
|
||||
@@ -473,7 +440,7 @@ class DocumentIndexingStatusApi(DatasetApiResource):
|
||||
return data
|
||||
|
||||
|
||||
class DocumentDetailApi(DatasetApiResource):
|
||||
class DocumentApi(DatasetApiResource):
|
||||
METADATA_CHOICES = {"all", "only", "without"}
|
||||
|
||||
def get(self, tenant_id, dataset_id, document_id):
|
||||
@@ -567,6 +534,37 @@ class DocumentDetailApi(DatasetApiResource):
|
||||
|
||||
return response
|
||||
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
def delete(self, tenant_id, dataset_id, document_id):
|
||||
"""Delete document."""
|
||||
document_id = str(document_id)
|
||||
dataset_id = str(dataset_id)
|
||||
tenant_id = str(tenant_id)
|
||||
|
||||
# get dataset info
|
||||
dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
|
||||
|
||||
if not dataset:
|
||||
raise ValueError("Dataset does not exist.")
|
||||
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
|
||||
# 404 if document not found
|
||||
if document is None:
|
||||
raise NotFound("Document Not Exists.")
|
||||
|
||||
# 403 if document is archived
|
||||
if DocumentService.check_archived(document):
|
||||
raise ArchivedDocumentImmutableError()
|
||||
|
||||
try:
|
||||
# delete document
|
||||
DocumentService.delete_document(document)
|
||||
except services.errors.document.DocumentIndexingError:
|
||||
raise DocumentIndexingError("Cannot delete document during indexing.")
|
||||
|
||||
return 204
|
||||
|
||||
|
||||
api.add_resource(
|
||||
DocumentAddByTextApi,
|
||||
@@ -588,7 +586,6 @@ api.add_resource(
|
||||
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file",
|
||||
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-file",
|
||||
)
|
||||
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
||||
api.add_resource(DocumentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
||||
api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
|
||||
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")
|
||||
api.add_resource(DocumentDetailApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
|
||||
|
||||
@@ -17,8 +17,8 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource):
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
def post(self, tenant_id, dataset_id):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("type", type=str, required=True, nullable=True, location="json")
|
||||
parser.add_argument("name", type=str, required=True, nullable=True, location="json")
|
||||
parser.add_argument("type", type=str, required=True, nullable=False, location="json")
|
||||
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
|
||||
args = parser.parse_args()
|
||||
metadata_args = MetadataArgs(**args)
|
||||
|
||||
@@ -43,7 +43,7 @@ class DatasetMetadataServiceApi(DatasetApiResource):
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
def patch(self, tenant_id, dataset_id, metadata_id):
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("name", type=str, required=True, nullable=True, location="json")
|
||||
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
dataset_id_str = str(dataset_id)
|
||||
@@ -101,7 +101,7 @@ class DocumentMetadataEditServiceApi(DatasetApiResource):
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
|
||||
parser = reqparse.RequestParser()
|
||||
parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json")
|
||||
parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json")
|
||||
args = parser.parse_args()
|
||||
metadata_args = MetadataOperationData(**args)
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from flask import request
|
||||
from flask_restful import Resource, marshal_with, reqparse
|
||||
from werkzeug.exceptions import Unauthorized
|
||||
|
||||
from controllers.common import fields
|
||||
from controllers.web import api
|
||||
@@ -75,14 +76,14 @@ class AppWebAuthPermission(Resource):
|
||||
try:
|
||||
auth_header = request.headers.get("Authorization")
|
||||
if auth_header is None:
|
||||
raise
|
||||
raise Unauthorized("Authorization header is missing.")
|
||||
if " " not in auth_header:
|
||||
raise
|
||||
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
|
||||
|
||||
auth_scheme, tk = auth_header.split(None, 1)
|
||||
auth_scheme = auth_scheme.lower()
|
||||
if auth_scheme != "bearer":
|
||||
raise
|
||||
raise Unauthorized("Authorization scheme must be 'Bearer'")
|
||||
|
||||
decoded = PassportService().verify(tk)
|
||||
user_id = decoded.get("user_id", "visitor")
|
||||
|
||||
@@ -12,18 +12,17 @@ from services.file_service import FileService
|
||||
class FileApi(WebApiResource):
|
||||
@marshal_with(file_fields)
|
||||
def post(self, app_model, end_user):
|
||||
file = request.files["file"]
|
||||
source = request.form.get("source")
|
||||
|
||||
if "file" not in request.files:
|
||||
raise NoFileUploadedError()
|
||||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
file = request.files["file"]
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
source = request.form.get("source")
|
||||
if source not in ("datasets", None):
|
||||
source = None
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ from flask_restful import fields, marshal_with, reqparse
|
||||
from flask_restful.inputs import int_range
|
||||
from werkzeug.exceptions import InternalServerError, NotFound
|
||||
|
||||
import services
|
||||
from controllers.web import api
|
||||
from controllers.web.error import (
|
||||
AppMoreLikeThisDisabledError,
|
||||
@@ -29,7 +28,11 @@ from models.model import AppMode
|
||||
from services.app_generate_service import AppGenerateService
|
||||
from services.errors.app import MoreLikeThisDisabledError
|
||||
from services.errors.conversation import ConversationNotExistsError
|
||||
from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
|
||||
from services.errors.message import (
|
||||
FirstMessageNotExistsError,
|
||||
MessageNotExistsError,
|
||||
SuggestedQuestionsAfterAnswerDisabledError,
|
||||
)
|
||||
from services.message_service import MessageService
|
||||
|
||||
|
||||
@@ -73,9 +76,9 @@ class MessageListApi(WebApiResource):
|
||||
return MessageService.pagination_by_first_id(
|
||||
app_model, end_user, args["conversation_id"], args["first_id"], args["limit"]
|
||||
)
|
||||
except services.errors.conversation.ConversationNotExistsError:
|
||||
except ConversationNotExistsError:
|
||||
raise NotFound("Conversation Not Exists.")
|
||||
except services.errors.message.FirstMessageNotExistsError:
|
||||
except FirstMessageNotExistsError:
|
||||
raise NotFound("First Message Not Exists.")
|
||||
|
||||
|
||||
@@ -96,7 +99,7 @@ class MessageFeedbackApi(WebApiResource):
|
||||
rating=args.get("rating"),
|
||||
content=args.get("content"),
|
||||
)
|
||||
except services.errors.message.MessageNotExistsError:
|
||||
except MessageNotExistsError:
|
||||
raise NotFound("Message Not Exists.")
|
||||
|
||||
return {"result": "success"}
|
||||
|
||||
@@ -148,6 +148,8 @@ SupportedComparisonOperator = Literal[
|
||||
"is not",
|
||||
"empty",
|
||||
"not empty",
|
||||
"in",
|
||||
"not in",
|
||||
# for number
|
||||
"=",
|
||||
"≠",
|
||||
|
||||
@@ -118,26 +118,8 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
|
||||
):
|
||||
return
|
||||
|
||||
# Init conversation variables
|
||||
stmt = select(ConversationVariable).where(
|
||||
ConversationVariable.app_id == self.conversation.app_id,
|
||||
ConversationVariable.conversation_id == self.conversation.id,
|
||||
)
|
||||
with Session(db.engine) as session:
|
||||
db_conversation_variables = session.scalars(stmt).all()
|
||||
if not db_conversation_variables:
|
||||
# Create conversation variables if they don't exist.
|
||||
db_conversation_variables = [
|
||||
ConversationVariable.from_variable(
|
||||
app_id=self.conversation.app_id, conversation_id=self.conversation.id, variable=variable
|
||||
)
|
||||
for variable in self._workflow.conversation_variables
|
||||
]
|
||||
session.add_all(db_conversation_variables)
|
||||
# Convert database entities to variables.
|
||||
conversation_variables = [item.to_variable() for item in db_conversation_variables]
|
||||
|
||||
session.commit()
|
||||
# Initialize conversation variables
|
||||
conversation_variables = self._initialize_conversation_variables()
|
||||
|
||||
# Create a variable pool.
|
||||
system_inputs = SystemVariable(
|
||||
@@ -292,3 +274,100 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
|
||||
message_id=message_id,
|
||||
trace_manager=app_generate_entity.trace_manager,
|
||||
)
|
||||
|
||||
def _initialize_conversation_variables(self) -> list[VariableUnion]:
|
||||
"""
|
||||
Initialize conversation variables for the current conversation.
|
||||
|
||||
This method:
|
||||
1. Loads existing variables from the database
|
||||
2. Creates new variables if none exist
|
||||
3. Syncs missing variables from the workflow definition
|
||||
|
||||
:return: List of conversation variables ready for use
|
||||
"""
|
||||
with Session(db.engine) as session:
|
||||
existing_variables = self._load_existing_conversation_variables(session)
|
||||
|
||||
if not existing_variables:
|
||||
# First time initialization - create all variables
|
||||
existing_variables = self._create_all_conversation_variables(session)
|
||||
else:
|
||||
# Check and add any missing variables from the workflow
|
||||
existing_variables = self._sync_missing_conversation_variables(session, existing_variables)
|
||||
|
||||
# Convert to Variable objects for use in the workflow
|
||||
conversation_variables = [var.to_variable() for var in existing_variables]
|
||||
|
||||
session.commit()
|
||||
return cast(list[VariableUnion], conversation_variables)
|
||||
|
||||
def _load_existing_conversation_variables(self, session: Session) -> list[ConversationVariable]:
|
||||
"""
|
||||
Load existing conversation variables from the database.
|
||||
|
||||
:param session: Database session
|
||||
:return: List of existing conversation variables
|
||||
"""
|
||||
stmt = select(ConversationVariable).where(
|
||||
ConversationVariable.app_id == self.conversation.app_id,
|
||||
ConversationVariable.conversation_id == self.conversation.id,
|
||||
)
|
||||
return list(session.scalars(stmt).all())
|
||||
|
||||
def _create_all_conversation_variables(self, session: Session) -> list[ConversationVariable]:
|
||||
"""
|
||||
Create all conversation variables for a new conversation.
|
||||
|
||||
:param session: Database session
|
||||
:return: List of created conversation variables
|
||||
"""
|
||||
new_variables = [
|
||||
ConversationVariable.from_variable(
|
||||
app_id=self.conversation.app_id, conversation_id=self.conversation.id, variable=variable
|
||||
)
|
||||
for variable in self._workflow.conversation_variables
|
||||
]
|
||||
|
||||
if new_variables:
|
||||
session.add_all(new_variables)
|
||||
|
||||
return new_variables
|
||||
|
||||
def _sync_missing_conversation_variables(
|
||||
self, session: Session, existing_variables: list[ConversationVariable]
|
||||
) -> list[ConversationVariable]:
|
||||
"""
|
||||
Sync missing conversation variables from the workflow definition.
|
||||
|
||||
This handles the case where new variables are added to a workflow
|
||||
after conversations have already been created.
|
||||
|
||||
:param session: Database session
|
||||
:param existing_variables: List of existing conversation variables
|
||||
:return: Updated list including any newly created variables
|
||||
"""
|
||||
# Get IDs of existing and workflow variables
|
||||
existing_ids = {var.id for var in existing_variables}
|
||||
workflow_variables = {var.id: var for var in self._workflow.conversation_variables}
|
||||
|
||||
# Find missing variable IDs
|
||||
missing_ids = set(workflow_variables.keys()) - existing_ids
|
||||
|
||||
if not missing_ids:
|
||||
return existing_variables
|
||||
|
||||
# Create missing variables with their default values
|
||||
new_variables = [
|
||||
ConversationVariable.from_variable(
|
||||
app_id=self.conversation.app_id,
|
||||
conversation_id=self.conversation.id,
|
||||
variable=workflow_variables[var_id],
|
||||
)
|
||||
for var_id in missing_ids
|
||||
]
|
||||
|
||||
session.add_all(new_variables)
|
||||
|
||||
# Return combined list
|
||||
return existing_variables + new_variables
|
||||
|
||||
@@ -9,7 +9,6 @@ from core.app.app_config.entities import EasyUIBasedAppConfig, WorkflowUIBasedAp
|
||||
from core.entities.provider_configuration import ProviderModelBundle
|
||||
from core.file import File, FileUploadConfig
|
||||
from core.model_runtime.entities.model_entities import AIModelEntity
|
||||
from core.ops.ops_trace_manager import TraceQueueManager
|
||||
|
||||
|
||||
class InvokeFrom(Enum):
|
||||
@@ -114,7 +113,8 @@ class AppGenerateEntity(BaseModel):
|
||||
extras: dict[str, Any] = Field(default_factory=dict)
|
||||
|
||||
# tracing instance
|
||||
trace_manager: Optional[TraceQueueManager] = None
|
||||
# Using Any to avoid circular import with TraceQueueManager
|
||||
trace_manager: Optional[Any] = None
|
||||
|
||||
|
||||
class EasyUIBasedAppGenerateEntity(AppGenerateEntity):
|
||||
|
||||
@@ -23,6 +23,7 @@ from core.app.entities.task_entities import (
|
||||
MessageFileStreamResponse,
|
||||
MessageReplaceStreamResponse,
|
||||
MessageStreamResponse,
|
||||
StreamEvent,
|
||||
WorkflowTaskState,
|
||||
)
|
||||
from core.llm_generator.llm_generator import LLMGenerator
|
||||
@@ -180,11 +181,15 @@ class MessageCycleManager:
|
||||
:param message_id: message id
|
||||
:return:
|
||||
"""
|
||||
message_file = db.session.query(MessageFile).filter(MessageFile.id == message_id).first()
|
||||
event_type = StreamEvent.MESSAGE_FILE if message_file else StreamEvent.MESSAGE
|
||||
|
||||
return MessageStreamResponse(
|
||||
task_id=self._application_generate_entity.task_id,
|
||||
id=message_id,
|
||||
answer=answer,
|
||||
from_variable_selector=from_variable_selector,
|
||||
event=event_type,
|
||||
)
|
||||
|
||||
def message_replace_to_stream_response(self, answer: str, reason: str = "") -> MessageReplaceStreamResponse:
|
||||
|
||||
@@ -843,7 +843,7 @@ class ProviderConfiguration(BaseModel):
|
||||
continue
|
||||
|
||||
status = ModelStatus.ACTIVE
|
||||
if m.model in model_setting_map:
|
||||
if m.model_type in model_setting_map and m.model in model_setting_map[m.model_type]:
|
||||
model_setting = model_setting_map[m.model_type][m.model]
|
||||
if model_setting.enabled is False:
|
||||
status = ModelStatus.DISABLED
|
||||
|
||||
@@ -176,7 +176,7 @@ class ProviderConfig(BasicProviderConfig):
|
||||
|
||||
scope: AppSelectorScope | ModelSelectorScope | ToolSelectorScope | None = None
|
||||
required: bool = False
|
||||
default: Optional[Union[int, str]] = None
|
||||
default: Optional[Union[int, str, float, bool]] = None
|
||||
options: Optional[list[Option]] = None
|
||||
label: Optional[I18nObject] = None
|
||||
help: Optional[I18nObject] = None
|
||||
|
||||
@@ -32,7 +32,7 @@ def get_attr(*, file: File, attr: FileAttribute):
|
||||
case FileAttribute.TRANSFER_METHOD:
|
||||
return file.transfer_method.value
|
||||
case FileAttribute.URL:
|
||||
return file.remote_url
|
||||
return _to_url(file)
|
||||
case FileAttribute.EXTENSION:
|
||||
return file.extension
|
||||
case FileAttribute.RELATED_ID:
|
||||
|
||||
@@ -121,9 +121,8 @@ class TokenBufferMemory:
|
||||
curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
if curr_message_tokens > max_token_limit:
|
||||
pruned_memory = []
|
||||
while curr_message_tokens > max_token_limit and len(prompt_messages) > 1:
|
||||
pruned_memory.append(prompt_messages.pop(0))
|
||||
prompt_messages.pop(0)
|
||||
curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)
|
||||
|
||||
return prompt_messages
|
||||
|
||||
@@ -10,6 +10,7 @@ from sqlalchemy.orm import Session, sessionmaker
|
||||
from core.ops.aliyun_trace.data_exporter.traceclient import (
|
||||
TraceClient,
|
||||
convert_datetime_to_nanoseconds,
|
||||
convert_string_to_id,
|
||||
convert_to_span_id,
|
||||
convert_to_trace_id,
|
||||
generate_span_id,
|
||||
@@ -101,8 +102,9 @@ class AliyunDataTrace(BaseTraceInstance):
|
||||
raise ValueError(f"Aliyun get run url failed: {str(e)}")
|
||||
|
||||
def workflow_trace(self, trace_info: WorkflowTraceInfo):
|
||||
external_trace_id = trace_info.metadata.get("external_trace_id")
|
||||
trace_id = external_trace_id or convert_to_trace_id(trace_info.workflow_run_id)
|
||||
trace_id = convert_to_trace_id(trace_info.workflow_run_id)
|
||||
if trace_info.trace_id:
|
||||
trace_id = convert_string_to_id(trace_info.trace_id)
|
||||
workflow_span_id = convert_to_span_id(trace_info.workflow_run_id, "workflow")
|
||||
self.add_workflow_span(trace_id, workflow_span_id, trace_info)
|
||||
|
||||
@@ -130,6 +132,9 @@ class AliyunDataTrace(BaseTraceInstance):
|
||||
status = Status(StatusCode.ERROR, trace_info.error)
|
||||
|
||||
trace_id = convert_to_trace_id(message_id)
|
||||
if trace_info.trace_id:
|
||||
trace_id = convert_string_to_id(trace_info.trace_id)
|
||||
|
||||
message_span_id = convert_to_span_id(message_id, "message")
|
||||
message_span = SpanData(
|
||||
trace_id=trace_id,
|
||||
@@ -186,9 +191,13 @@ class AliyunDataTrace(BaseTraceInstance):
|
||||
return
|
||||
message_id = trace_info.message_id
|
||||
|
||||
trace_id = convert_to_trace_id(message_id)
|
||||
if trace_info.trace_id:
|
||||
trace_id = convert_string_to_id(trace_info.trace_id)
|
||||
|
||||
documents_data = extract_retrieval_documents(trace_info.documents)
|
||||
dataset_retrieval_span = SpanData(
|
||||
trace_id=convert_to_trace_id(message_id),
|
||||
trace_id=trace_id,
|
||||
parent_span_id=convert_to_span_id(message_id, "message"),
|
||||
span_id=generate_span_id(),
|
||||
name="dataset_retrieval",
|
||||
@@ -214,8 +223,12 @@ class AliyunDataTrace(BaseTraceInstance):
|
||||
if trace_info.error:
|
||||
status = Status(StatusCode.ERROR, trace_info.error)
|
||||
|
||||
trace_id = convert_to_trace_id(message_id)
|
||||
if trace_info.trace_id:
|
||||
trace_id = convert_string_to_id(trace_info.trace_id)
|
||||
|
||||
tool_span = SpanData(
|
||||
trace_id=convert_to_trace_id(message_id),
|
||||
trace_id=trace_id,
|
||||
parent_span_id=convert_to_span_id(message_id, "message"),
|
||||
span_id=generate_span_id(),
|
||||
name=trace_info.tool_name,
|
||||
@@ -451,8 +464,13 @@ class AliyunDataTrace(BaseTraceInstance):
|
||||
status: Status = Status(StatusCode.OK)
|
||||
if trace_info.error:
|
||||
status = Status(StatusCode.ERROR, trace_info.error)
|
||||
|
||||
trace_id = convert_to_trace_id(message_id)
|
||||
if trace_info.trace_id:
|
||||
trace_id = convert_string_to_id(trace_info.trace_id)
|
||||
|
||||
suggested_question_span = SpanData(
|
||||
trace_id=convert_to_trace_id(message_id),
|
||||
trace_id=trace_id,
|
||||
parent_span_id=convert_to_span_id(message_id, "message"),
|
||||
span_id=convert_to_span_id(message_id, "suggested_question"),
|
||||
name="suggested_question",
|
||||
|
||||
@@ -181,15 +181,21 @@ def convert_to_trace_id(uuid_v4: Optional[str]) -> int:
|
||||
raise ValueError(f"Invalid UUID input: {e}")
|
||||
|
||||
|
||||
def convert_string_to_id(string: Optional[str]) -> int:
|
||||
if not string:
|
||||
return generate_span_id()
|
||||
hash_bytes = hashlib.sha256(string.encode("utf-8")).digest()
|
||||
id = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False)
|
||||
return id
|
||||
|
||||
|
||||
def convert_to_span_id(uuid_v4: Optional[str], span_type: str) -> int:
|
||||
try:
|
||||
uuid_obj = uuid.UUID(uuid_v4)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid UUID input: {e}")
|
||||
combined_key = f"{uuid_obj.hex}-{span_type}"
|
||||
hash_bytes = hashlib.sha256(combined_key.encode("utf-8")).digest()
|
||||
span_id = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False)
|
||||
return span_id
|
||||
return convert_string_to_id(combined_key)
|
||||
|
||||
|
||||
def convert_datetime_to_nanoseconds(start_time_a: Optional[datetime]) -> Optional[int]:
|
||||
|
||||
@@ -4,6 +4,7 @@ import logging
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Optional, Union, cast
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from openinference.semconv.trace import OpenInferenceSpanKindValues, SpanAttributes
|
||||
from opentelemetry import trace
|
||||
@@ -40,8 +41,14 @@ def setup_tracer(arize_phoenix_config: ArizeConfig | PhoenixConfig) -> tuple[tra
|
||||
try:
|
||||
# Choose the appropriate exporter based on config type
|
||||
exporter: Union[GrpcOTLPSpanExporter, HttpOTLPSpanExporter]
|
||||
|
||||
# Inspect the provided endpoint to determine its structure
|
||||
parsed = urlparse(arize_phoenix_config.endpoint)
|
||||
base_endpoint = f"{parsed.scheme}://{parsed.netloc}"
|
||||
path = parsed.path.rstrip("/")
|
||||
|
||||
if isinstance(arize_phoenix_config, ArizeConfig):
|
||||
arize_endpoint = f"{arize_phoenix_config.endpoint}/v1"
|
||||
arize_endpoint = f"{base_endpoint}/v1"
|
||||
arize_headers = {
|
||||
"api_key": arize_phoenix_config.api_key or "",
|
||||
"space_id": arize_phoenix_config.space_id or "",
|
||||
@@ -53,7 +60,7 @@ def setup_tracer(arize_phoenix_config: ArizeConfig | PhoenixConfig) -> tuple[tra
|
||||
timeout=30,
|
||||
)
|
||||
else:
|
||||
phoenix_endpoint = f"{arize_phoenix_config.endpoint}/v1/traces"
|
||||
phoenix_endpoint = f"{base_endpoint}{path}/v1/traces"
|
||||
phoenix_headers = {
|
||||
"api_key": arize_phoenix_config.api_key or "",
|
||||
"authorization": f"Bearer {arize_phoenix_config.api_key or ''}",
|
||||
@@ -91,16 +98,21 @@ def datetime_to_nanos(dt: Optional[datetime]) -> int:
|
||||
return int(dt.timestamp() * 1_000_000_000)
|
||||
|
||||
|
||||
def uuid_to_trace_id(string: Optional[str]) -> int:
|
||||
"""Convert UUID string to a valid trace ID (16-byte integer)."""
|
||||
def string_to_trace_id128(string: Optional[str]) -> int:
|
||||
"""
|
||||
Convert any input string into a stable 128-bit integer trace ID.
|
||||
|
||||
This uses SHA-256 hashing and takes the first 16 bytes (128 bits) of the digest.
|
||||
It's suitable for generating consistent, unique identifiers from strings.
|
||||
"""
|
||||
if string is None:
|
||||
string = ""
|
||||
hash_object = hashlib.sha256(string.encode())
|
||||
|
||||
# Take the first 16 bytes (128 bits) of the hash
|
||||
# Take the first 16 bytes (128 bits) of the hash digest
|
||||
digest = hash_object.digest()[:16]
|
||||
|
||||
# Convert to integer (128 bits)
|
||||
# Convert to a 128-bit integer
|
||||
return int.from_bytes(digest, byteorder="big")
|
||||
|
||||
|
||||
@@ -153,8 +165,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
|
||||
}
|
||||
workflow_metadata.update(trace_info.metadata)
|
||||
|
||||
external_trace_id = trace_info.metadata.get("external_trace_id")
|
||||
trace_id = external_trace_id or uuid_to_trace_id(trace_info.workflow_run_id)
|
||||
trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.workflow_run_id)
|
||||
span_id = RandomIdGenerator().generate_span_id()
|
||||
context = SpanContext(
|
||||
trace_id=trace_id,
|
||||
@@ -310,7 +321,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
|
||||
SpanAttributes.SESSION_ID: trace_info.message_data.conversation_id,
|
||||
}
|
||||
|
||||
trace_id = uuid_to_trace_id(trace_info.message_id)
|
||||
trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.message_id)
|
||||
message_span_id = RandomIdGenerator().generate_span_id()
|
||||
span_context = SpanContext(
|
||||
trace_id=trace_id,
|
||||
@@ -406,7 +417,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
|
||||
}
|
||||
metadata.update(trace_info.metadata)
|
||||
|
||||
trace_id = uuid_to_trace_id(trace_info.message_id)
|
||||
trace_id = string_to_trace_id128(trace_info.message_id)
|
||||
span_id = RandomIdGenerator().generate_span_id()
|
||||
context = SpanContext(
|
||||
trace_id=trace_id,
|
||||
@@ -468,7 +479,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
|
||||
}
|
||||
metadata.update(trace_info.metadata)
|
||||
|
||||
trace_id = uuid_to_trace_id(trace_info.message_id)
|
||||
trace_id = string_to_trace_id128(trace_info.message_id)
|
||||
span_id = RandomIdGenerator().generate_span_id()
|
||||
context = SpanContext(
|
||||
trace_id=trace_id,
|
||||
@@ -521,7 +532,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
|
||||
}
|
||||
metadata.update(trace_info.metadata)
|
||||
|
||||
trace_id = uuid_to_trace_id(trace_info.message_id)
|
||||
trace_id = string_to_trace_id128(trace_info.message_id)
|
||||
span_id = RandomIdGenerator().generate_span_id()
|
||||
context = SpanContext(
|
||||
trace_id=trace_id,
|
||||
@@ -568,7 +579,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
|
||||
"tool_config": json.dumps(trace_info.tool_config, ensure_ascii=False),
|
||||
}
|
||||
|
||||
trace_id = uuid_to_trace_id(trace_info.message_id)
|
||||
trace_id = string_to_trace_id128(trace_info.message_id)
|
||||
tool_span_id = RandomIdGenerator().generate_span_id()
|
||||
logger.info("[Arize/Phoenix] Creating tool trace with trace_id: %s, span_id: %s", trace_id, tool_span_id)
|
||||
|
||||
@@ -629,7 +640,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
|
||||
}
|
||||
metadata.update(trace_info.metadata)
|
||||
|
||||
trace_id = uuid_to_trace_id(trace_info.message_id)
|
||||
trace_id = string_to_trace_id128(trace_info.message_id)
|
||||
span_id = RandomIdGenerator().generate_span_id()
|
||||
context = SpanContext(
|
||||
trace_id=trace_id,
|
||||
|
||||
@@ -87,7 +87,7 @@ class PhoenixConfig(BaseTracingConfig):
|
||||
@field_validator("endpoint")
|
||||
@classmethod
|
||||
def endpoint_validator(cls, v, info: ValidationInfo):
|
||||
return cls.validate_endpoint_url(v, "https://app.phoenix.arize.com")
|
||||
return validate_url_with_path(v, "https://app.phoenix.arize.com")
|
||||
|
||||
|
||||
class LangfuseConfig(BaseTracingConfig):
|
||||
|
||||
@@ -14,6 +14,7 @@ class BaseTraceInfo(BaseModel):
|
||||
start_time: Optional[datetime] = None
|
||||
end_time: Optional[datetime] = None
|
||||
metadata: dict[str, Any]
|
||||
trace_id: Optional[str] = None
|
||||
|
||||
@field_validator("inputs", "outputs")
|
||||
@classmethod
|
||||
|
||||
@@ -67,14 +67,13 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
self.generate_name_trace(trace_info)
|
||||
|
||||
def workflow_trace(self, trace_info: WorkflowTraceInfo):
|
||||
external_trace_id = trace_info.metadata.get("external_trace_id")
|
||||
trace_id = external_trace_id or trace_info.workflow_run_id
|
||||
trace_id = trace_info.trace_id or trace_info.workflow_run_id
|
||||
user_id = trace_info.metadata.get("user_id")
|
||||
metadata = trace_info.metadata
|
||||
metadata["workflow_app_log_id"] = trace_info.workflow_app_log_id
|
||||
|
||||
if trace_info.message_id:
|
||||
trace_id = external_trace_id or trace_info.message_id
|
||||
trace_id = trace_info.trace_id or trace_info.message_id
|
||||
name = TraceTaskName.MESSAGE_TRACE.value
|
||||
trace_data = LangfuseTrace(
|
||||
id=trace_id,
|
||||
@@ -250,8 +249,10 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
user_id = end_user_data.session_id
|
||||
metadata["user_id"] = user_id
|
||||
|
||||
trace_id = trace_info.trace_id or message_id
|
||||
|
||||
trace_data = LangfuseTrace(
|
||||
id=message_id,
|
||||
id=trace_id,
|
||||
user_id=user_id,
|
||||
name=TraceTaskName.MESSAGE_TRACE.value,
|
||||
input={
|
||||
@@ -285,7 +286,7 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
|
||||
langfuse_generation_data = LangfuseGeneration(
|
||||
name="llm",
|
||||
trace_id=message_id,
|
||||
trace_id=trace_id,
|
||||
start_time=trace_info.start_time,
|
||||
end_time=trace_info.end_time,
|
||||
model=message_data.model_id,
|
||||
@@ -311,7 +312,7 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
"preset_response": trace_info.preset_response,
|
||||
"inputs": trace_info.inputs,
|
||||
},
|
||||
trace_id=trace_info.message_id,
|
||||
trace_id=trace_info.trace_id or trace_info.message_id,
|
||||
start_time=trace_info.start_time or trace_info.message_data.created_at,
|
||||
end_time=trace_info.end_time or trace_info.message_data.created_at,
|
||||
metadata=trace_info.metadata,
|
||||
@@ -334,7 +335,7 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
name=TraceTaskName.SUGGESTED_QUESTION_TRACE.value,
|
||||
input=trace_info.inputs,
|
||||
output=str(trace_info.suggested_question),
|
||||
trace_id=trace_info.message_id,
|
||||
trace_id=trace_info.trace_id or trace_info.message_id,
|
||||
start_time=trace_info.start_time,
|
||||
end_time=trace_info.end_time,
|
||||
metadata=trace_info.metadata,
|
||||
@@ -352,7 +353,7 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
name=TraceTaskName.DATASET_RETRIEVAL_TRACE.value,
|
||||
input=trace_info.inputs,
|
||||
output={"documents": trace_info.documents},
|
||||
trace_id=trace_info.message_id,
|
||||
trace_id=trace_info.trace_id or trace_info.message_id,
|
||||
start_time=trace_info.start_time or trace_info.message_data.created_at,
|
||||
end_time=trace_info.end_time or trace_info.message_data.updated_at,
|
||||
metadata=trace_info.metadata,
|
||||
@@ -365,7 +366,7 @@ class LangFuseDataTrace(BaseTraceInstance):
|
||||
name=trace_info.tool_name,
|
||||
input=trace_info.tool_inputs,
|
||||
output=trace_info.tool_outputs,
|
||||
trace_id=trace_info.message_id,
|
||||
trace_id=trace_info.trace_id or trace_info.message_id,
|
||||
start_time=trace_info.start_time,
|
||||
end_time=trace_info.end_time,
|
||||
metadata=trace_info.metadata,
|
||||
|
||||
@@ -65,8 +65,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
self.generate_name_trace(trace_info)
|
||||
|
||||
def workflow_trace(self, trace_info: WorkflowTraceInfo):
|
||||
external_trace_id = trace_info.metadata.get("external_trace_id")
|
||||
trace_id = external_trace_id or trace_info.message_id or trace_info.workflow_run_id
|
||||
trace_id = trace_info.trace_id or trace_info.message_id or trace_info.workflow_run_id
|
||||
if trace_info.start_time is None:
|
||||
trace_info.start_time = datetime.now()
|
||||
message_dotted_order = (
|
||||
@@ -290,7 +289,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
reference_example_id=None,
|
||||
input_attachments={},
|
||||
output_attachments={},
|
||||
trace_id=None,
|
||||
trace_id=trace_info.trace_id,
|
||||
dotted_order=None,
|
||||
parent_run_id=None,
|
||||
)
|
||||
@@ -319,7 +318,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
reference_example_id=None,
|
||||
input_attachments={},
|
||||
output_attachments={},
|
||||
trace_id=None,
|
||||
trace_id=trace_info.trace_id,
|
||||
dotted_order=None,
|
||||
id=str(uuid.uuid4()),
|
||||
)
|
||||
@@ -351,7 +350,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
reference_example_id=None,
|
||||
input_attachments={},
|
||||
output_attachments={},
|
||||
trace_id=None,
|
||||
trace_id=trace_info.trace_id,
|
||||
dotted_order=None,
|
||||
error="",
|
||||
file_list=[],
|
||||
@@ -381,7 +380,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
reference_example_id=None,
|
||||
input_attachments={},
|
||||
output_attachments={},
|
||||
trace_id=None,
|
||||
trace_id=trace_info.trace_id,
|
||||
dotted_order=None,
|
||||
error="",
|
||||
file_list=[],
|
||||
@@ -410,7 +409,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
reference_example_id=None,
|
||||
input_attachments={},
|
||||
output_attachments={},
|
||||
trace_id=None,
|
||||
trace_id=trace_info.trace_id,
|
||||
dotted_order=None,
|
||||
error="",
|
||||
file_list=[],
|
||||
@@ -440,7 +439,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
reference_example_id=None,
|
||||
input_attachments={},
|
||||
output_attachments={},
|
||||
trace_id=None,
|
||||
trace_id=trace_info.trace_id,
|
||||
dotted_order=None,
|
||||
error=trace_info.error or "",
|
||||
)
|
||||
@@ -465,7 +464,7 @@ class LangSmithDataTrace(BaseTraceInstance):
|
||||
reference_example_id=None,
|
||||
input_attachments={},
|
||||
output_attachments={},
|
||||
trace_id=None,
|
||||
trace_id=trace_info.trace_id,
|
||||
dotted_order=None,
|
||||
error="",
|
||||
file_list=[],
|
||||
|
||||
@@ -96,8 +96,7 @@ class OpikDataTrace(BaseTraceInstance):
|
||||
self.generate_name_trace(trace_info)
|
||||
|
||||
def workflow_trace(self, trace_info: WorkflowTraceInfo):
|
||||
external_trace_id = trace_info.metadata.get("external_trace_id")
|
||||
dify_trace_id = external_trace_id or trace_info.workflow_run_id
|
||||
dify_trace_id = trace_info.trace_id or trace_info.workflow_run_id
|
||||
opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)
|
||||
workflow_metadata = wrap_metadata(
|
||||
trace_info.metadata, message_id=trace_info.message_id, workflow_app_log_id=trace_info.workflow_app_log_id
|
||||
@@ -105,7 +104,7 @@ class OpikDataTrace(BaseTraceInstance):
|
||||
root_span_id = None
|
||||
|
||||
if trace_info.message_id:
|
||||
dify_trace_id = external_trace_id or trace_info.message_id
|
||||
dify_trace_id = trace_info.trace_id or trace_info.message_id
|
||||
opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)
|
||||
|
||||
trace_data = {
|
||||
@@ -276,7 +275,7 @@ class OpikDataTrace(BaseTraceInstance):
|
||||
return
|
||||
|
||||
metadata = trace_info.metadata
|
||||
message_id = trace_info.message_id
|
||||
dify_trace_id = trace_info.trace_id or trace_info.message_id
|
||||
|
||||
user_id = message_data.from_account_id
|
||||
metadata["user_id"] = user_id
|
||||
@@ -291,7 +290,7 @@ class OpikDataTrace(BaseTraceInstance):
|
||||
metadata["end_user_id"] = end_user_id
|
||||
|
||||
trace_data = {
|
||||
"id": prepare_opik_uuid(trace_info.start_time, message_id),
|
||||
"id": prepare_opik_uuid(trace_info.start_time, dify_trace_id),
|
||||
"name": TraceTaskName.MESSAGE_TRACE.value,
|
||||
"start_time": trace_info.start_time,
|
||||
"end_time": trace_info.end_time,
|
||||
@@ -330,7 +329,7 @@ class OpikDataTrace(BaseTraceInstance):
|
||||
start_time = trace_info.start_time or trace_info.message_data.created_at
|
||||
|
||||
span_data = {
|
||||
"trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
|
||||
"trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id),
|
||||
"name": TraceTaskName.MODERATION_TRACE.value,
|
||||
"type": "tool",
|
||||
"start_time": start_time,
|
||||
@@ -356,7 +355,7 @@ class OpikDataTrace(BaseTraceInstance):
|
||||
start_time = trace_info.start_time or message_data.created_at
|
||||
|
||||
span_data = {
|
||||
"trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
|
||||
"trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id),
|
||||
"name": TraceTaskName.SUGGESTED_QUESTION_TRACE.value,
|
||||
"type": "tool",
|
||||
"start_time": start_time,
|
||||
@@ -376,7 +375,7 @@ class OpikDataTrace(BaseTraceInstance):
|
||||
start_time = trace_info.start_time or trace_info.message_data.created_at
|
||||
|
||||
span_data = {
|
||||
"trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
|
||||
"trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id),
|
||||
"name": TraceTaskName.DATASET_RETRIEVAL_TRACE.value,
|
||||
"type": "tool",
|
||||
"start_time": start_time,
|
||||
@@ -391,7 +390,7 @@ class OpikDataTrace(BaseTraceInstance):
|
||||
|
||||
def tool_trace(self, trace_info: ToolTraceInfo):
|
||||
span_data = {
|
||||
"trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
|
||||
"trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.trace_id or trace_info.message_id),
|
||||
"name": trace_info.tool_name,
|
||||
"type": "tool",
|
||||
"start_time": trace_info.start_time,
|
||||
@@ -406,7 +405,7 @@ class OpikDataTrace(BaseTraceInstance):
|
||||
|
||||
def generate_name_trace(self, trace_info: GenerateNameTraceInfo):
|
||||
trace_data = {
|
||||
"id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
|
||||
"id": prepare_opik_uuid(trace_info.start_time, trace_info.trace_id or trace_info.message_id),
|
||||
"name": TraceTaskName.GENERATE_NAME_TRACE.value,
|
||||
"start_time": trace_info.start_time,
|
||||
"end_time": trace_info.end_time,
|
||||
|
||||
@@ -322,7 +322,7 @@ class OpsTraceManager:
|
||||
:return:
|
||||
"""
|
||||
# auth check
|
||||
if enabled == True:
|
||||
if enabled:
|
||||
try:
|
||||
provider_config_map[tracing_provider]
|
||||
except KeyError:
|
||||
@@ -422,8 +422,11 @@ class TraceTask:
|
||||
self.timer = timer
|
||||
self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001")
|
||||
self.app_id = None
|
||||
|
||||
self.trace_id = None
|
||||
self.kwargs = kwargs
|
||||
external_trace_id = kwargs.get("external_trace_id")
|
||||
if external_trace_id:
|
||||
self.trace_id = external_trace_id
|
||||
|
||||
def execute(self):
|
||||
return self.preprocess()
|
||||
@@ -520,11 +523,8 @@ class TraceTask:
|
||||
"app_id": workflow_run.app_id,
|
||||
}
|
||||
|
||||
external_trace_id = self.kwargs.get("external_trace_id")
|
||||
if external_trace_id:
|
||||
metadata["external_trace_id"] = external_trace_id
|
||||
|
||||
workflow_trace_info = WorkflowTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
workflow_data=workflow_run.to_dict(),
|
||||
conversation_id=conversation_id,
|
||||
workflow_id=workflow_id,
|
||||
@@ -584,6 +584,7 @@ class TraceTask:
|
||||
message_tokens = message_data.message_tokens
|
||||
|
||||
message_trace_info = MessageTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
message_id=message_id,
|
||||
message_data=message_data.to_dict(),
|
||||
conversation_model=conversation_mode,
|
||||
@@ -627,6 +628,7 @@ class TraceTask:
|
||||
workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None
|
||||
|
||||
moderation_trace_info = ModerationTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
message_id=workflow_app_log_id or message_id,
|
||||
inputs=inputs,
|
||||
message_data=message_data.to_dict(),
|
||||
@@ -667,6 +669,7 @@ class TraceTask:
|
||||
workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None
|
||||
|
||||
suggested_question_trace_info = SuggestedQuestionTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
message_id=workflow_app_log_id or message_id,
|
||||
message_data=message_data.to_dict(),
|
||||
inputs=message_data.message,
|
||||
@@ -708,6 +711,7 @@ class TraceTask:
|
||||
}
|
||||
|
||||
dataset_retrieval_trace_info = DatasetRetrievalTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
message_id=message_id,
|
||||
inputs=message_data.query or message_data.inputs,
|
||||
documents=[doc.model_dump() for doc in documents] if documents else [],
|
||||
@@ -772,6 +776,7 @@ class TraceTask:
|
||||
)
|
||||
|
||||
tool_trace_info = ToolTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
message_id=message_id,
|
||||
message_data=message_data.to_dict(),
|
||||
tool_name=tool_name,
|
||||
@@ -807,6 +812,7 @@ class TraceTask:
|
||||
}
|
||||
|
||||
generate_name_trace_info = GenerateNameTraceInfo(
|
||||
trace_id=self.trace_id,
|
||||
conversation_id=conversation_id,
|
||||
inputs=inputs,
|
||||
outputs=generate_conversation_name,
|
||||
|
||||
@@ -87,8 +87,7 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
self.generate_name_trace(trace_info)
|
||||
|
||||
def workflow_trace(self, trace_info: WorkflowTraceInfo):
|
||||
external_trace_id = trace_info.metadata.get("external_trace_id")
|
||||
trace_id = external_trace_id or trace_info.message_id or trace_info.workflow_run_id
|
||||
trace_id = trace_info.trace_id or trace_info.message_id or trace_info.workflow_run_id
|
||||
if trace_info.start_time is None:
|
||||
trace_info.start_time = datetime.now()
|
||||
|
||||
@@ -245,8 +244,12 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
attributes["start_time"] = trace_info.start_time
|
||||
attributes["end_time"] = trace_info.end_time
|
||||
attributes["tags"] = ["message", str(trace_info.conversation_mode)]
|
||||
|
||||
trace_id = trace_info.trace_id or message_id
|
||||
attributes["trace_id"] = trace_id
|
||||
|
||||
message_run = WeaveTraceModel(
|
||||
id=message_id,
|
||||
id=trace_id,
|
||||
op=str(TraceTaskName.MESSAGE_TRACE.value),
|
||||
input_tokens=trace_info.message_tokens,
|
||||
output_tokens=trace_info.answer_tokens,
|
||||
@@ -274,7 +277,7 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
)
|
||||
self.start_call(
|
||||
llm_run,
|
||||
parent_run_id=message_id,
|
||||
parent_run_id=trace_id,
|
||||
)
|
||||
self.finish_call(llm_run)
|
||||
self.finish_call(message_run)
|
||||
@@ -289,6 +292,9 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
attributes["start_time"] = trace_info.start_time or trace_info.message_data.created_at
|
||||
attributes["end_time"] = trace_info.end_time or trace_info.message_data.updated_at
|
||||
|
||||
trace_id = trace_info.trace_id or trace_info.message_id
|
||||
attributes["trace_id"] = trace_id
|
||||
|
||||
moderation_run = WeaveTraceModel(
|
||||
id=str(uuid.uuid4()),
|
||||
op=str(TraceTaskName.MODERATION_TRACE.value),
|
||||
@@ -303,7 +309,7 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
exception=getattr(trace_info, "error", None),
|
||||
file_list=[],
|
||||
)
|
||||
self.start_call(moderation_run, parent_run_id=trace_info.message_id)
|
||||
self.start_call(moderation_run, parent_run_id=trace_id)
|
||||
self.finish_call(moderation_run)
|
||||
|
||||
def suggested_question_trace(self, trace_info: SuggestedQuestionTraceInfo):
|
||||
@@ -316,6 +322,9 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
attributes["start_time"] = (trace_info.start_time or message_data.created_at,)
|
||||
attributes["end_time"] = (trace_info.end_time or message_data.updated_at,)
|
||||
|
||||
trace_id = trace_info.trace_id or trace_info.message_id
|
||||
attributes["trace_id"] = trace_id
|
||||
|
||||
suggested_question_run = WeaveTraceModel(
|
||||
id=str(uuid.uuid4()),
|
||||
op=str(TraceTaskName.SUGGESTED_QUESTION_TRACE.value),
|
||||
@@ -326,7 +335,7 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
file_list=[],
|
||||
)
|
||||
|
||||
self.start_call(suggested_question_run, parent_run_id=trace_info.message_id)
|
||||
self.start_call(suggested_question_run, parent_run_id=trace_id)
|
||||
self.finish_call(suggested_question_run)
|
||||
|
||||
def dataset_retrieval_trace(self, trace_info: DatasetRetrievalTraceInfo):
|
||||
@@ -338,6 +347,9 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
attributes["start_time"] = (trace_info.start_time or trace_info.message_data.created_at,)
|
||||
attributes["end_time"] = (trace_info.end_time or trace_info.message_data.updated_at,)
|
||||
|
||||
trace_id = trace_info.trace_id or trace_info.message_id
|
||||
attributes["trace_id"] = trace_id
|
||||
|
||||
dataset_retrieval_run = WeaveTraceModel(
|
||||
id=str(uuid.uuid4()),
|
||||
op=str(TraceTaskName.DATASET_RETRIEVAL_TRACE.value),
|
||||
@@ -348,7 +360,7 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
file_list=[],
|
||||
)
|
||||
|
||||
self.start_call(dataset_retrieval_run, parent_run_id=trace_info.message_id)
|
||||
self.start_call(dataset_retrieval_run, parent_run_id=trace_id)
|
||||
self.finish_call(dataset_retrieval_run)
|
||||
|
||||
def tool_trace(self, trace_info: ToolTraceInfo):
|
||||
@@ -357,6 +369,11 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
attributes["start_time"] = trace_info.start_time
|
||||
attributes["end_time"] = trace_info.end_time
|
||||
|
||||
message_id = trace_info.message_id or getattr(trace_info, "conversation_id", None)
|
||||
message_id = message_id or None
|
||||
trace_id = trace_info.trace_id or message_id
|
||||
attributes["trace_id"] = trace_id
|
||||
|
||||
tool_run = WeaveTraceModel(
|
||||
id=str(uuid.uuid4()),
|
||||
op=trace_info.tool_name,
|
||||
@@ -366,9 +383,7 @@ class WeaveDataTrace(BaseTraceInstance):
|
||||
attributes=attributes,
|
||||
exception=trace_info.error,
|
||||
)
|
||||
message_id = trace_info.message_id or getattr(trace_info, "conversation_id", None)
|
||||
message_id = message_id or None
|
||||
self.start_call(tool_run, parent_run_id=message_id)
|
||||
self.start_call(tool_run, parent_run_id=trace_id)
|
||||
self.finish_call(tool_run)
|
||||
|
||||
def generate_name_trace(self, trace_info: GenerateNameTraceInfo):
|
||||
|
||||
@@ -208,6 +208,7 @@ class BasePluginClient:
|
||||
except Exception:
|
||||
raise PluginDaemonInnerError(code=rep.code, message=rep.message)
|
||||
|
||||
logger.error("Error in stream reponse for plugin %s", rep.__dict__)
|
||||
self._handle_plugin_daemon_error(error.error_type, error.message)
|
||||
raise ValueError(f"plugin daemon: {rep.message}, code: {rep.code}")
|
||||
if rep.data is None:
|
||||
|
||||
@@ -2,6 +2,8 @@ from collections.abc import Mapping
|
||||
|
||||
from pydantic import TypeAdapter
|
||||
|
||||
from extensions.ext_logging import get_request_id
|
||||
|
||||
|
||||
class PluginDaemonError(Exception):
|
||||
"""Base class for all plugin daemon errors."""
|
||||
@@ -11,7 +13,7 @@ class PluginDaemonError(Exception):
|
||||
|
||||
def __str__(self) -> str:
|
||||
# returns the class name and description
|
||||
return f"{self.__class__.__name__}: {self.description}"
|
||||
return f"req_id: {get_request_id()} {self.__class__.__name__}: {self.description}"
|
||||
|
||||
|
||||
class PluginDaemonInternalError(PluginDaemonError):
|
||||
|
||||
190
api/core/rag/datasource/vdb/clickzetta/README.md
Normal file
190
api/core/rag/datasource/vdb/clickzetta/README.md
Normal file
@@ -0,0 +1,190 @@
|
||||
# Clickzetta Vector Database Integration
|
||||
|
||||
This module provides integration with Clickzetta Lakehouse as a vector database for Dify.
|
||||
|
||||
## Features
|
||||
|
||||
- **Vector Storage**: Store and retrieve high-dimensional vectors using Clickzetta's native VECTOR type
|
||||
- **Vector Search**: Efficient similarity search using HNSW algorithm
|
||||
- **Full-Text Search**: Leverage Clickzetta's inverted index for powerful text search capabilities
|
||||
- **Hybrid Search**: Combine vector similarity and full-text search for better results
|
||||
- **Multi-language Support**: Built-in support for Chinese, English, and Unicode text processing
|
||||
- **Scalable**: Leverage Clickzetta's distributed architecture for large-scale deployments
|
||||
|
||||
## Configuration
|
||||
|
||||
### Required Environment Variables
|
||||
|
||||
All seven configuration parameters are required:
|
||||
|
||||
```bash
|
||||
# Authentication
|
||||
CLICKZETTA_USERNAME=your_username
|
||||
CLICKZETTA_PASSWORD=your_password
|
||||
|
||||
# Instance configuration
|
||||
CLICKZETTA_INSTANCE=your_instance_id
|
||||
CLICKZETTA_SERVICE=api.clickzetta.com
|
||||
CLICKZETTA_WORKSPACE=your_workspace
|
||||
CLICKZETTA_VCLUSTER=your_vcluster
|
||||
CLICKZETTA_SCHEMA=your_schema
|
||||
```
|
||||
|
||||
### Optional Configuration
|
||||
|
||||
```bash
|
||||
# Batch processing
|
||||
CLICKZETTA_BATCH_SIZE=100
|
||||
|
||||
# Full-text search configuration
|
||||
CLICKZETTA_ENABLE_INVERTED_INDEX=true
|
||||
CLICKZETTA_ANALYZER_TYPE=chinese # Options: keyword, english, chinese, unicode
|
||||
CLICKZETTA_ANALYZER_MODE=smart # Options: max_word, smart
|
||||
|
||||
# Vector search configuration
|
||||
CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance # Options: l2_distance, cosine_distance
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### 1. Set Clickzetta as the Vector Store
|
||||
|
||||
In your Dify configuration, set:
|
||||
|
||||
```bash
|
||||
VECTOR_STORE=clickzetta
|
||||
```
|
||||
|
||||
### 2. Table Structure
|
||||
|
||||
Clickzetta will automatically create tables with the following structure:
|
||||
|
||||
```sql
|
||||
CREATE TABLE <collection_name> (
|
||||
id STRING NOT NULL,
|
||||
content STRING NOT NULL,
|
||||
metadata JSON,
|
||||
vector VECTOR(FLOAT, <dimension>) NOT NULL,
|
||||
PRIMARY KEY (id)
|
||||
);
|
||||
|
||||
-- Vector index for similarity search
|
||||
CREATE VECTOR INDEX idx_<collection_name>_vec
|
||||
ON TABLE <schema>.<collection_name>(vector)
|
||||
PROPERTIES (
|
||||
"distance.function" = "cosine_distance",
|
||||
"scalar.type" = "f32"
|
||||
);
|
||||
|
||||
-- Inverted index for full-text search (if enabled)
|
||||
CREATE INVERTED INDEX idx_<collection_name>_text
|
||||
ON <schema>.<collection_name>(content)
|
||||
PROPERTIES (
|
||||
"analyzer" = "chinese",
|
||||
"mode" = "smart"
|
||||
);
|
||||
```
|
||||
|
||||
## Full-Text Search Capabilities
|
||||
|
||||
Clickzetta supports advanced full-text search with multiple analyzers:
|
||||
|
||||
### Analyzer Types
|
||||
|
||||
1. **keyword**: No tokenization, treats the entire string as a single token
|
||||
- Best for: Exact matching, IDs, codes
|
||||
|
||||
2. **english**: Designed for English text
|
||||
- Features: Recognizes ASCII letters and numbers, converts to lowercase
|
||||
- Best for: English content
|
||||
|
||||
3. **chinese**: Chinese text tokenizer
|
||||
- Features: Recognizes Chinese and English characters, removes punctuation
|
||||
- Best for: Chinese or mixed Chinese-English content
|
||||
|
||||
4. **unicode**: Multi-language tokenizer based on Unicode
|
||||
- Features: Recognizes text boundaries in multiple languages
|
||||
- Best for: Multi-language content
|
||||
|
||||
### Analyzer Modes
|
||||
|
||||
- **max_word**: Fine-grained tokenization (more tokens)
|
||||
- **smart**: Intelligent tokenization (balanced)
|
||||
|
||||
### Full-Text Search Functions
|
||||
|
||||
- `MATCH_ALL(column, query)`: All terms must be present
|
||||
- `MATCH_ANY(column, query)`: At least one term must be present
|
||||
- `MATCH_PHRASE(column, query)`: Exact phrase matching
|
||||
- `MATCH_PHRASE_PREFIX(column, query)`: Phrase prefix matching
|
||||
- `MATCH_REGEXP(column, pattern)`: Regular expression matching
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### Vector Search
|
||||
|
||||
1. **Adjust exploration factor** for accuracy vs speed trade-off:
|
||||
```sql
|
||||
SET cz.vector.index.search.ef=64;
|
||||
```
|
||||
|
||||
2. **Use appropriate distance functions**:
|
||||
- `cosine_distance`: Best for normalized embeddings (e.g., from language models)
|
||||
- `l2_distance`: Best for raw feature vectors
|
||||
|
||||
### Full-Text Search
|
||||
|
||||
1. **Choose the right analyzer**:
|
||||
- Use `keyword` for exact matching
|
||||
- Use language-specific analyzers for better tokenization
|
||||
|
||||
2. **Combine with vector search**:
|
||||
- Pre-filter with full-text search for better performance
|
||||
- Use hybrid search for improved relevance
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Connection Issues
|
||||
|
||||
1. Verify all 7 required configuration parameters are set
|
||||
2. Check network connectivity to Clickzetta service
|
||||
3. Ensure the user has proper permissions on the schema
|
||||
|
||||
### Search Performance
|
||||
|
||||
1. Verify vector index exists:
|
||||
```sql
|
||||
SHOW INDEX FROM <schema>.<table_name>;
|
||||
```
|
||||
|
||||
2. Check if vector index is being used:
|
||||
```sql
|
||||
EXPLAIN SELECT ... WHERE l2_distance(...) < threshold;
|
||||
```
|
||||
Look for `vector_index_search_type` in the execution plan.
|
||||
|
||||
### Full-Text Search Not Working
|
||||
|
||||
1. Verify inverted index is created
|
||||
2. Check analyzer configuration matches your content language
|
||||
3. Use `TOKENIZE()` function to test tokenization:
|
||||
```sql
|
||||
SELECT TOKENIZE('your text', map('analyzer', 'chinese', 'mode', 'smart'));
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
1. Vector operations don't support `ORDER BY` or `GROUP BY` directly on vector columns
|
||||
2. Full-text search relevance scores are not provided by Clickzetta
|
||||
3. Inverted index creation may fail for very large existing tables (continue without error)
|
||||
4. Index naming constraints:
|
||||
- Index names must be unique within a schema
|
||||
- Only one vector index can be created per column
|
||||
- The implementation uses timestamps to ensure unique index names
|
||||
5. A column can only have one vector index at a time
|
||||
|
||||
## References
|
||||
|
||||
- [Clickzetta Vector Search Documentation](https://yunqi.tech/documents/vector-search)
|
||||
- [Clickzetta Inverted Index Documentation](https://yunqi.tech/documents/inverted-index)
|
||||
- [Clickzetta SQL Functions](https://yunqi.tech/documents/sql-reference)
|
||||
1
api/core/rag/datasource/vdb/clickzetta/__init__.py
Normal file
1
api/core/rag/datasource/vdb/clickzetta/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Clickzetta Vector Database Integration for Dify
|
||||
1086
api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
Normal file
1086
api/core/rag/datasource/vdb/clickzetta/clickzetta_vector.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -7,6 +7,7 @@ from urllib.parse import urlparse
|
||||
import requests
|
||||
from elasticsearch import Elasticsearch
|
||||
from flask import current_app
|
||||
from packaging.version import parse as parse_version
|
||||
from pydantic import BaseModel, model_validator
|
||||
|
||||
from core.rag.datasource.vdb.field import Field
|
||||
@@ -22,22 +23,50 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ElasticSearchConfig(BaseModel):
|
||||
host: str
|
||||
port: int
|
||||
username: str
|
||||
password: str
|
||||
# Regular Elasticsearch config
|
||||
host: Optional[str] = None
|
||||
port: Optional[int] = None
|
||||
username: Optional[str] = None
|
||||
password: Optional[str] = None
|
||||
|
||||
# Elastic Cloud specific config
|
||||
cloud_url: Optional[str] = None # Cloud URL for Elasticsearch Cloud
|
||||
api_key: Optional[str] = None
|
||||
|
||||
# Common config
|
||||
use_cloud: bool = False
|
||||
ca_certs: Optional[str] = None
|
||||
verify_certs: bool = False
|
||||
request_timeout: int = 100000
|
||||
retry_on_timeout: bool = True
|
||||
max_retries: int = 10000
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validate_config(cls, values: dict) -> dict:
|
||||
if not values["host"]:
|
||||
raise ValueError("config HOST is required")
|
||||
if not values["port"]:
|
||||
raise ValueError("config PORT is required")
|
||||
if not values["username"]:
|
||||
raise ValueError("config USERNAME is required")
|
||||
if not values["password"]:
|
||||
raise ValueError("config PASSWORD is required")
|
||||
use_cloud = values.get("use_cloud", False)
|
||||
cloud_url = values.get("cloud_url")
|
||||
|
||||
if use_cloud:
|
||||
# Cloud configuration validation - requires cloud_url and api_key
|
||||
if not cloud_url:
|
||||
raise ValueError("cloud_url is required for Elastic Cloud")
|
||||
|
||||
api_key = values.get("api_key")
|
||||
if not api_key:
|
||||
raise ValueError("api_key is required for Elastic Cloud")
|
||||
|
||||
else:
|
||||
# Regular Elasticsearch validation
|
||||
if not values.get("host"):
|
||||
raise ValueError("config HOST is required for regular Elasticsearch")
|
||||
if not values.get("port"):
|
||||
raise ValueError("config PORT is required for regular Elasticsearch")
|
||||
if not values.get("username"):
|
||||
raise ValueError("config USERNAME is required for regular Elasticsearch")
|
||||
if not values.get("password"):
|
||||
raise ValueError("config PASSWORD is required for regular Elasticsearch")
|
||||
|
||||
return values
|
||||
|
||||
|
||||
@@ -50,21 +79,69 @@ class ElasticSearchVector(BaseVector):
|
||||
self._attributes = attributes
|
||||
|
||||
def _init_client(self, config: ElasticSearchConfig) -> Elasticsearch:
|
||||
"""
|
||||
Initialize Elasticsearch client for both regular Elasticsearch and Elastic Cloud.
|
||||
"""
|
||||
try:
|
||||
parsed_url = urlparse(config.host)
|
||||
if parsed_url.scheme in {"http", "https"}:
|
||||
hosts = f"{config.host}:{config.port}"
|
||||
# Check if using Elastic Cloud
|
||||
client_config: dict[str, Any]
|
||||
if config.use_cloud and config.cloud_url:
|
||||
client_config = {
|
||||
"request_timeout": config.request_timeout,
|
||||
"retry_on_timeout": config.retry_on_timeout,
|
||||
"max_retries": config.max_retries,
|
||||
"verify_certs": config.verify_certs,
|
||||
}
|
||||
|
||||
# Parse cloud URL and configure hosts
|
||||
parsed_url = urlparse(config.cloud_url)
|
||||
host = f"{parsed_url.scheme}://{parsed_url.hostname}"
|
||||
if parsed_url.port:
|
||||
host += f":{parsed_url.port}"
|
||||
|
||||
client_config["hosts"] = [host]
|
||||
|
||||
# API key authentication for cloud
|
||||
client_config["api_key"] = config.api_key
|
||||
|
||||
# SSL settings
|
||||
if config.ca_certs:
|
||||
client_config["ca_certs"] = config.ca_certs
|
||||
|
||||
else:
|
||||
hosts = f"http://{config.host}:{config.port}"
|
||||
client = Elasticsearch(
|
||||
hosts=hosts,
|
||||
basic_auth=(config.username, config.password),
|
||||
request_timeout=100000,
|
||||
retry_on_timeout=True,
|
||||
max_retries=10000,
|
||||
)
|
||||
except requests.exceptions.ConnectionError:
|
||||
raise ConnectionError("Vector database connection error")
|
||||
# Regular Elasticsearch configuration
|
||||
parsed_url = urlparse(config.host or "")
|
||||
if parsed_url.scheme in {"http", "https"}:
|
||||
hosts = f"{config.host}:{config.port}"
|
||||
use_https = parsed_url.scheme == "https"
|
||||
else:
|
||||
hosts = f"http://{config.host}:{config.port}"
|
||||
use_https = False
|
||||
|
||||
client_config = {
|
||||
"hosts": [hosts],
|
||||
"basic_auth": (config.username, config.password),
|
||||
"request_timeout": config.request_timeout,
|
||||
"retry_on_timeout": config.retry_on_timeout,
|
||||
"max_retries": config.max_retries,
|
||||
}
|
||||
|
||||
# Only add SSL settings if using HTTPS
|
||||
if use_https:
|
||||
client_config["verify_certs"] = config.verify_certs
|
||||
if config.ca_certs:
|
||||
client_config["ca_certs"] = config.ca_certs
|
||||
|
||||
client = Elasticsearch(**client_config)
|
||||
|
||||
# Test connection
|
||||
if not client.ping():
|
||||
raise ConnectionError("Failed to connect to Elasticsearch")
|
||||
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
raise ConnectionError(f"Vector database connection error: {str(e)}")
|
||||
except Exception as e:
|
||||
raise ConnectionError(f"Elasticsearch client initialization failed: {str(e)}")
|
||||
|
||||
return client
|
||||
|
||||
@@ -73,7 +150,7 @@ class ElasticSearchVector(BaseVector):
|
||||
return cast(str, info["version"]["number"])
|
||||
|
||||
def _check_version(self):
|
||||
if self._version < "8.0.0":
|
||||
if parse_version(self._version) < parse_version("8.0.0"):
|
||||
raise ValueError("Elasticsearch vector database version must be greater than 8.0.0")
|
||||
|
||||
def get_type(self) -> str:
|
||||
@@ -209,7 +286,11 @@ class ElasticSearchVector(BaseVector):
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
self._client.indices.create(index=self._collection_name, mappings=mappings)
|
||||
logger.info("Created index %s with dimension %s", self._collection_name, dim)
|
||||
else:
|
||||
logger.info("Collection %s already exists.", self._collection_name)
|
||||
|
||||
redis_client.set(collection_exist_cache_key, 1, ex=3600)
|
||||
|
||||
@@ -225,13 +306,51 @@ class ElasticSearchVectorFactory(AbstractVectorFactory):
|
||||
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.ELASTICSEARCH, collection_name))
|
||||
|
||||
config = current_app.config
|
||||
|
||||
# Check if ELASTICSEARCH_USE_CLOUD is explicitly set to false (boolean)
|
||||
use_cloud_env = config.get("ELASTICSEARCH_USE_CLOUD", False)
|
||||
|
||||
if use_cloud_env is False:
|
||||
# Use regular Elasticsearch with config values
|
||||
config_dict = {
|
||||
"use_cloud": False,
|
||||
"host": config.get("ELASTICSEARCH_HOST", "elasticsearch"),
|
||||
"port": config.get("ELASTICSEARCH_PORT", 9200),
|
||||
"username": config.get("ELASTICSEARCH_USERNAME", "elastic"),
|
||||
"password": config.get("ELASTICSEARCH_PASSWORD", "elastic"),
|
||||
}
|
||||
else:
|
||||
# Check for cloud configuration
|
||||
cloud_url = config.get("ELASTICSEARCH_CLOUD_URL")
|
||||
if cloud_url:
|
||||
config_dict = {
|
||||
"use_cloud": True,
|
||||
"cloud_url": cloud_url,
|
||||
"api_key": config.get("ELASTICSEARCH_API_KEY"),
|
||||
}
|
||||
else:
|
||||
# Fallback to regular Elasticsearch
|
||||
config_dict = {
|
||||
"use_cloud": False,
|
||||
"host": config.get("ELASTICSEARCH_HOST", "localhost"),
|
||||
"port": config.get("ELASTICSEARCH_PORT", 9200),
|
||||
"username": config.get("ELASTICSEARCH_USERNAME", "elastic"),
|
||||
"password": config.get("ELASTICSEARCH_PASSWORD", ""),
|
||||
}
|
||||
|
||||
# Common configuration
|
||||
config_dict.update(
|
||||
{
|
||||
"ca_certs": str(config.get("ELASTICSEARCH_CA_CERTS")) if config.get("ELASTICSEARCH_CA_CERTS") else None,
|
||||
"verify_certs": bool(config.get("ELASTICSEARCH_VERIFY_CERTS", False)),
|
||||
"request_timeout": int(config.get("ELASTICSEARCH_REQUEST_TIMEOUT", 100000)),
|
||||
"retry_on_timeout": bool(config.get("ELASTICSEARCH_RETRY_ON_TIMEOUT", True)),
|
||||
"max_retries": int(config.get("ELASTICSEARCH_MAX_RETRIES", 10000)),
|
||||
}
|
||||
)
|
||||
|
||||
return ElasticSearchVector(
|
||||
index_name=collection_name,
|
||||
config=ElasticSearchConfig(
|
||||
host=config.get("ELASTICSEARCH_HOST", "localhost"),
|
||||
port=config.get("ELASTICSEARCH_PORT", 9200),
|
||||
username=config.get("ELASTICSEARCH_USERNAME", ""),
|
||||
password=config.get("ELASTICSEARCH_PASSWORD", ""),
|
||||
),
|
||||
config=ElasticSearchConfig(**config_dict),
|
||||
attributes=[],
|
||||
)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
from typing import Any, Optional
|
||||
|
||||
import tablestore # type: ignore
|
||||
@@ -22,6 +23,7 @@ class TableStoreConfig(BaseModel):
|
||||
access_key_secret: Optional[str] = None
|
||||
instance_name: Optional[str] = None
|
||||
endpoint: Optional[str] = None
|
||||
normalize_full_text_bm25_score: Optional[bool] = False
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
@@ -47,6 +49,7 @@ class TableStoreVector(BaseVector):
|
||||
config.access_key_secret,
|
||||
config.instance_name,
|
||||
)
|
||||
self._normalize_full_text_bm25_score = config.normalize_full_text_bm25_score
|
||||
self._table_name = f"{collection_name}"
|
||||
self._index_name = f"{collection_name}_idx"
|
||||
self._tags_field = f"{Field.METADATA_KEY.value}_tags"
|
||||
@@ -131,8 +134,8 @@ class TableStoreVector(BaseVector):
|
||||
filtered_list = None
|
||||
if document_ids_filter:
|
||||
filtered_list = ["document_id=" + item for item in document_ids_filter]
|
||||
|
||||
return self._search_by_full_text(query, filtered_list, top_k)
|
||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||
return self._search_by_full_text(query, filtered_list, top_k, score_threshold)
|
||||
|
||||
def delete(self) -> None:
|
||||
self._delete_table_if_exist()
|
||||
@@ -318,7 +321,19 @@ class TableStoreVector(BaseVector):
|
||||
documents = sorted(documents, key=lambda x: x.metadata["score"] if x.metadata else 0, reverse=True)
|
||||
return documents
|
||||
|
||||
def _search_by_full_text(self, query: str, document_ids_filter: list[str] | None, top_k: int) -> list[Document]:
|
||||
@staticmethod
|
||||
def _normalize_score_exp_decay(score: float, k: float = 0.15) -> float:
|
||||
"""
|
||||
Args:
|
||||
score: BM25 search score.
|
||||
k: decay factor, the larger the k, the steeper the low score end
|
||||
"""
|
||||
normalized_score = 1 - math.exp(-k * score)
|
||||
return max(0.0, min(1.0, normalized_score))
|
||||
|
||||
def _search_by_full_text(
|
||||
self, query: str, document_ids_filter: list[str] | None, top_k: int, score_threshold: float
|
||||
) -> list[Document]:
|
||||
bool_query = tablestore.BoolQuery(must_queries=[], filter_queries=[], should_queries=[], must_not_queries=[])
|
||||
bool_query.must_queries.append(tablestore.MatchQuery(text=query, field_name=Field.CONTENT_KEY.value))
|
||||
|
||||
@@ -339,15 +354,27 @@ class TableStoreVector(BaseVector):
|
||||
|
||||
documents = []
|
||||
for search_hit in search_response.search_hits:
|
||||
score = None
|
||||
if self._normalize_full_text_bm25_score:
|
||||
score = self._normalize_score_exp_decay(search_hit.score)
|
||||
|
||||
# skip when score is below threshold and use normalize score
|
||||
if score and score <= score_threshold:
|
||||
continue
|
||||
|
||||
ots_column_map = {}
|
||||
for col in search_hit.row[1]:
|
||||
ots_column_map[col[0]] = col[1]
|
||||
|
||||
vector_str = ots_column_map.get(Field.VECTOR.value)
|
||||
metadata_str = ots_column_map.get(Field.METADATA_KEY.value)
|
||||
vector = json.loads(vector_str) if vector_str else None
|
||||
metadata = json.loads(metadata_str) if metadata_str else {}
|
||||
|
||||
vector_str = ots_column_map.get(Field.VECTOR.value)
|
||||
vector = json.loads(vector_str) if vector_str else None
|
||||
|
||||
if score:
|
||||
metadata["score"] = score
|
||||
|
||||
documents.append(
|
||||
Document(
|
||||
page_content=ots_column_map.get(Field.CONTENT_KEY.value) or "",
|
||||
@@ -355,6 +382,8 @@ class TableStoreVector(BaseVector):
|
||||
metadata=metadata,
|
||||
)
|
||||
)
|
||||
if self._normalize_full_text_bm25_score:
|
||||
documents = sorted(documents, key=lambda x: x.metadata["score"] if x.metadata else 0, reverse=True)
|
||||
return documents
|
||||
|
||||
|
||||
@@ -375,5 +404,6 @@ class TableStoreVectorFactory(AbstractVectorFactory):
|
||||
instance_name=dify_config.TABLESTORE_INSTANCE_NAME,
|
||||
access_key_id=dify_config.TABLESTORE_ACCESS_KEY_ID,
|
||||
access_key_secret=dify_config.TABLESTORE_ACCESS_KEY_SECRET,
|
||||
normalize_full_text_bm25_score=dify_config.TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -246,6 +246,10 @@ class TencentVector(BaseVector):
|
||||
return self._get_search_res(res, score_threshold)
|
||||
|
||||
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
|
||||
document_ids_filter = kwargs.get("document_ids_filter")
|
||||
filter = None
|
||||
if document_ids_filter:
|
||||
filter = Filter(Filter.In("metadata.document_id", document_ids_filter))
|
||||
if not self._enable_hybrid_search:
|
||||
return []
|
||||
res = self._client.hybrid_search(
|
||||
@@ -269,6 +273,7 @@ class TencentVector(BaseVector):
|
||||
),
|
||||
retrieve_vector=False,
|
||||
limit=kwargs.get("top_k", 4),
|
||||
filter=filter,
|
||||
)
|
||||
score_threshold = float(kwargs.get("score_threshold") or 0.0)
|
||||
return self._get_search_res(res, score_threshold)
|
||||
|
||||
@@ -172,6 +172,10 @@ class Vector:
|
||||
from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneVectorFactory
|
||||
|
||||
return MatrixoneVectorFactory
|
||||
case VectorType.CLICKZETTA:
|
||||
from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaVectorFactory
|
||||
|
||||
return ClickzettaVectorFactory
|
||||
case _:
|
||||
raise ValueError(f"Vector store {vector_type} is not supported.")
|
||||
|
||||
|
||||
@@ -30,3 +30,4 @@ class VectorType(StrEnum):
|
||||
TABLESTORE = "tablestore"
|
||||
HUAWEI_CLOUD = "huawei_cloud"
|
||||
MATRIXONE = "matrixone"
|
||||
CLICKZETTA = "clickzetta"
|
||||
|
||||
@@ -13,6 +13,8 @@ SupportedComparisonOperator = Literal[
|
||||
"is not",
|
||||
"empty",
|
||||
"not empty",
|
||||
"in",
|
||||
"not in",
|
||||
# for number
|
||||
"=",
|
||||
"≠",
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
import logging
|
||||
import operator
|
||||
from typing import Any, Optional, cast
|
||||
|
||||
import requests
|
||||
@@ -130,13 +131,15 @@ class NotionExtractor(BaseExtractor):
|
||||
data[property_name] = value
|
||||
row_dict = {k: v for k, v in data.items() if v}
|
||||
row_content = ""
|
||||
for key, value in row_dict.items():
|
||||
for key, value in sorted(row_dict.items(), key=operator.itemgetter(0)):
|
||||
if isinstance(value, dict):
|
||||
value_dict = {k: v for k, v in value.items() if v}
|
||||
value_content = "".join(f"{k}:{v} " for k, v in value_dict.items())
|
||||
row_content = row_content + f"{key}:{value_content}\n"
|
||||
else:
|
||||
row_content = row_content + f"{key}:{value}\n"
|
||||
if "url" in result:
|
||||
row_content = row_content + f"Row Page URL:{result.get('url', '')}\n"
|
||||
database_content.append(row_content)
|
||||
|
||||
has_more = response_data.get("has_more", False)
|
||||
|
||||
@@ -62,7 +62,7 @@ class WordExtractor(BaseExtractor):
|
||||
|
||||
def extract(self) -> list[Document]:
|
||||
"""Load given path as single page."""
|
||||
content = self.parse_docx(self.file_path, "storage")
|
||||
content = self.parse_docx(self.file_path)
|
||||
return [
|
||||
Document(
|
||||
page_content=content,
|
||||
@@ -189,23 +189,8 @@ class WordExtractor(BaseExtractor):
|
||||
paragraph_content.append(run.text)
|
||||
return "".join(paragraph_content).strip()
|
||||
|
||||
def _parse_paragraph(self, paragraph, image_map):
|
||||
paragraph_content = []
|
||||
for run in paragraph.runs:
|
||||
if run.element.xpath(".//a:blip"):
|
||||
for blip in run.element.xpath(".//a:blip"):
|
||||
embed_id = blip.get("{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed")
|
||||
if embed_id:
|
||||
rel_target = run.part.rels[embed_id].target_ref
|
||||
if rel_target in image_map:
|
||||
paragraph_content.append(image_map[rel_target])
|
||||
if run.text.strip():
|
||||
paragraph_content.append(run.text.strip())
|
||||
return " ".join(paragraph_content) if paragraph_content else ""
|
||||
|
||||
def parse_docx(self, docx_path, image_folder):
|
||||
def parse_docx(self, docx_path):
|
||||
doc = DocxDocument(docx_path)
|
||||
os.makedirs(image_folder, exist_ok=True)
|
||||
|
||||
content = []
|
||||
|
||||
|
||||
@@ -5,14 +5,13 @@ from __future__ import annotations
|
||||
from typing import Any, Optional
|
||||
|
||||
from core.model_manager import ModelInstance
|
||||
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
|
||||
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenizer import GPT2Tokenizer
|
||||
from core.rag.splitter.text_splitter import (
|
||||
TS,
|
||||
Collection,
|
||||
Literal,
|
||||
RecursiveCharacterTextSplitter,
|
||||
Set,
|
||||
TokenTextSplitter,
|
||||
Union,
|
||||
)
|
||||
|
||||
@@ -45,14 +44,6 @@ class EnhanceRecursiveCharacterTextSplitter(RecursiveCharacterTextSplitter):
|
||||
|
||||
return [len(text) for text in texts]
|
||||
|
||||
if issubclass(cls, TokenTextSplitter):
|
||||
extra_kwargs = {
|
||||
"model_name": embedding_model_instance.model if embedding_model_instance else "gpt2",
|
||||
"allowed_special": allowed_special,
|
||||
"disallowed_special": disallowed_special,
|
||||
}
|
||||
kwargs = {**kwargs, **extra_kwargs}
|
||||
|
||||
return cls(length_function=_character_encoder, **kwargs)
|
||||
|
||||
|
||||
|
||||
@@ -20,9 +20,6 @@ class Tool(ABC):
|
||||
The base class of a tool
|
||||
"""
|
||||
|
||||
entity: ToolEntity
|
||||
runtime: ToolRuntime
|
||||
|
||||
def __init__(self, entity: ToolEntity, runtime: ToolRuntime) -> None:
|
||||
self.entity = entity
|
||||
self.runtime = runtime
|
||||
|
||||
@@ -37,12 +37,12 @@ class LocaltimeToTimestampTool(BuiltinTool):
|
||||
@staticmethod
|
||||
def localtime_to_timestamp(localtime: str, time_format: str, local_tz=None) -> int | None:
|
||||
try:
|
||||
if local_tz is None:
|
||||
local_tz = datetime.now().astimezone().tzinfo
|
||||
if isinstance(local_tz, str):
|
||||
local_tz = pytz.timezone(local_tz)
|
||||
local_time = datetime.strptime(localtime, time_format)
|
||||
localtime = local_tz.localize(local_time) # type: ignore
|
||||
if local_tz is None:
|
||||
localtime = local_time.astimezone() # type: ignore
|
||||
elif isinstance(local_tz, str):
|
||||
local_tz = pytz.timezone(local_tz)
|
||||
localtime = local_tz.localize(local_time) # type: ignore
|
||||
timestamp = int(localtime.timestamp()) # type: ignore
|
||||
return timestamp
|
||||
except Exception as e:
|
||||
|
||||
@@ -27,7 +27,7 @@ class TimezoneConversionTool(BuiltinTool):
|
||||
target_time = self.timezone_convert(current_time, current_timezone, target_timezone) # type: ignore
|
||||
if not target_time:
|
||||
yield self.create_text_message(
|
||||
f"Invalid datatime and timezone: {current_time},{current_timezone},{target_timezone}"
|
||||
f"Invalid datetime and timezone: {current_time},{current_timezone},{target_timezone}"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
@@ -20,8 +20,6 @@ class BuiltinTool(Tool):
|
||||
:param meta: the meta data of a tool call processing
|
||||
"""
|
||||
|
||||
provider: str
|
||||
|
||||
def __init__(self, provider: str, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.provider = provider
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user