Compare commits

...

190 Commits

Author SHA1 Message Date
Wu Tianwei
0baccb9e82 chore(version): bump version to 1.7.2 (#23740) 2025-08-11 17:12:44 +08:00
lyzno1
2c81db5a1c feat: enhance GotoAnything UX with @ command selector (#23738) 2025-08-11 15:47:19 +08:00
-LAN-
43411d7a9e chore: remove debug log statements from DifyAPIRepositoryFactory (#23734) 2025-08-11 15:39:20 +08:00
-LAN-
2dbf20a3e9 fix: resolve circular import in AppGenerateEntity (#23731) 2025-08-11 15:38:28 +08:00
-LAN-
aaf9fc1562 fix: add @property decorator to pydantic computed_field for compatibility (#23728) 2025-08-11 15:34:19 +08:00
Wu Tianwei
d30f898274 fix: model selector language undefined error (#23723) 2025-08-11 14:39:22 +08:00
NeatGuyCoding
4a72fa6268 fix: Enhance doc_form null checking, exception handling, and rollback logic (#23713) 2025-08-11 13:53:40 +08:00
lyzno1
0c5e66bccb fix: unified error handling for GotoAnything search actions (#23715) 2025-08-11 11:57:06 +08:00
HyaCinth
ff791efe18 fix: Optimize the event handling for inserting variable shortcuts, resolving incorrect blur issues (#22981) (#23707) 2025-08-11 11:08:12 +08:00
NeatGuyCoding
6083b1d618 Feat add testcontainers test for message service (#23703)
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
2025-08-11 10:49:32 +08:00
github-actions[bot]
69c3439c3a chore: translate i18n files (#23704)
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
2025-08-11 10:35:29 +08:00
crazywoola
7ee170f0a7 Feat node search (#23685)
Co-authored-by: GuanMu <ballmanjq@gmail.com>
Co-authored-by: zhujiruo <zhujiruo@foxmail.com>
Co-authored-by: Matri Qi <matrixdom@126.com>
Co-authored-by: croatialu <wuli.croatia@foxmail.com>
Co-authored-by: HyaCinth <88471803+HyaCiovo@users.noreply.github.com>
Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com>
2025-08-11 10:19:52 +08:00
Yongtao Huang
36b221b170 Fix model_setting_map key mismatch (#23699)
Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com>
2025-08-11 09:33:26 +08:00
Guangdong Liu
d1fc98200c fix: update HTTP request timeout fields to use Field with default values (#23694) 2025-08-11 09:30:16 +08:00
lyzno1
bb852ef6d2 fix: improve dark mode UI consistency in signin page (#23684) 2025-08-10 17:21:05 +08:00
lyzno1
a17b7b3d89 fix: correct File Preview API position in Japanese advanced chat template (#23645) 2025-08-10 11:17:38 +08:00
github-actions[bot]
dc65a72d93 chore: translate i18n files (#23679)
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
2025-08-10 11:17:14 +08:00
Matri Qi
ea502d36a9 lint: fix sonarjs/no-dead-store issues (#23653) 2025-08-10 11:16:43 +08:00
Guangdong Liu
79a3c1618f fix: support custom file types in workflow Start node (#23672)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-10 11:09:47 +08:00
baonudesifeizhai
0be3b4e7a6 fix: Add internationalization support for date input formatting Fixes #23266 (#23678) 2025-08-10 11:05:55 +08:00
NeatGuyCoding
5f8967918e Feat add testcontainers test for app dsl service (#23675) 2025-08-10 11:03:46 +08:00
-LAN-
6900b08134 fix: sync missing conversation variables for existing conversations (#23649) 2025-08-09 22:42:18 +08:00
lyzno1
dc641348f6 fix: resolve datasets container rounded corners disappearing during scroll (#23667) 2025-08-09 22:41:42 +08:00
Yongtao Huang
431e0105de Fix bare raise in if blocks (#23671)
Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com>
2025-08-09 22:40:55 +08:00
-LAN-
cbe0d9d053 fix: conversation pinned filter returns incorrect results when no conversations are pinned (#23670) 2025-08-09 22:40:28 +08:00
lyzno1
f9abcfd789 fix: change i18n workflow trigger mechanism to run in main repository (#23662) 2025-08-09 16:31:24 +08:00
Ganondorf
5a0a2b7e44 Allow to export full screen image of workflow (#23655) 2025-08-09 15:31:32 +08:00
NeatGuyCoding
41345199d8 Feat add testcontainers test for api base extendsion service (#23652) 2025-08-09 12:57:24 +08:00
lyzno1
8362365eae Fix file type misclassification in logs interface (#23641) 2025-08-08 22:58:52 +08:00
yunqiqiliang
14e1c16cf2 Fix ClickZetta stability and reduce logging noise (#23632)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-08 22:57:47 +08:00
Joel
b32b7712e2 fix: i18n options too long breaks plugin setting ui (#23640) 2025-08-08 18:34:12 +08:00
GuanMu
5cf55fcbab feat: Enhance the alignment logic of the nodes in the context menu (#23617) 2025-08-08 15:20:24 +08:00
lyzno1
57c4fc6bf8 Fix X button animation glitches in secret key modals (#23614) 2025-08-08 14:47:15 +08:00
lyzno1
92fcf0854b fix: eliminate AppInfo sidebar animation glitches and layout jumps (#23609) 2025-08-08 12:55:52 +08:00
yunqiqiliang
f73ec60311 Bug Fix: Fix workflow knowledge retrieval cache bug (#23597)
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-08-08 11:42:20 +08:00
crazywoola
1c60b7f070 Revert "feat: improved MCP timeout" (#23602) 2025-08-08 11:20:53 +08:00
znn
084dcd1a50 using message_file event instead of message event (#22988) 2025-08-08 09:50:20 +08:00
湛露先生
fd536a943a word extractor cleans. (#20926)
Signed-off-by: zhanluxianshen <zhanluxianshen@163.com>
2025-08-08 09:37:51 +08:00
jiangbo721
6f80fb72cb feat: tools json response support datetime uuid etc (#22738)
Co-authored-by: 刘江波 <jiangbo721@163.com>
2025-08-08 09:26:50 +08:00
Matri Qi
cb5e2ad9b2 lint: fix tailwind lint issues (#23367)
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
2025-08-08 09:25:41 +08:00
yunqiqiliang
62772e8871 fix: ensure vector database cleanup on dataset deletion regardless of document presence (affects all 33 vector databases) (#23574)
Co-authored-by: Claude <noreply@anthropic.com>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-08 09:18:43 +08:00
Will
4b0480c8b3 feat: improved MCP timeout (#23546) 2025-08-08 09:08:14 +08:00
Yongtao Huang
c8c591d73c Fix incorrect exception handling in db query (#23582)
Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com>
2025-08-08 09:07:59 +08:00
lyzno1
2edd32fdea fix: resolve AppCard description overlap with tag area (#23585)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-08 09:05:55 +08:00
NeatGuyCoding
5889059ce4 Feat add testcontainers test for annnotation service (#23593) 2025-08-08 09:03:50 +08:00
-LAN-
7230497bf4 fix: empty arrays should convert to empty string in LLM prompts (#23590) 2025-08-08 08:50:37 +08:00
lyzno1
d98071a088 feat: add Service API file preview endpoint (#23534)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-07 21:44:29 +08:00
BotAndyGao
ac02c12e49 fix: resolve Windows path separator issue in Huawei OBS operations (#23475)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-07 21:17:46 +08:00
NFish
11d29e8d3e fix: update invite settings page style in dark mode (#23571) 2025-08-07 21:01:01 +08:00
NFish
b44ecf9bf7 fix: hide opensource license link when custom branding is enabled (#23569) 2025-08-07 05:37:05 -07:00
NeatGuyCoding
e60f148824 minor fix translation (#23568) 2025-08-07 05:36:52 -07:00
Qiang Lee
e9045a8838 Fix: Apply Metadata Filters Correctly in Full-Text Search Mode for Tencent Cloud Vector Database (#23564) 2025-08-07 05:36:06 -07:00
-LAN-
55487ba0c6 fix: exclude dev dependencies from production Docker image (#23562) 2025-08-07 05:35:32 -07:00
crazywoola
305ea0a2d5 Fix/footer behavior (#23555) 2025-08-07 16:55:23 +08:00
thief
a5ca76befb Fixes #23536 (#23542) 2025-08-07 14:42:34 +08:00
yunqiqiliang
e01510e2a6 feat: Add Clickzetta Lakehouse vector database integration (#22551)
Co-authored-by: Claude <noreply@anthropic.com>
2025-08-07 14:21:46 +08:00
HyaCinth
2931c891a7 chore: Optimize component styles and interactions (#23250) (#23543) 2025-08-07 14:19:38 +08:00
goofy
ad1b1193fa fix localtime_to_timestamp tool throws 'no attribute localize error' when it executes without specifying a timezone parameter (#23517) 2025-08-07 11:14:45 +08:00
crazywoola
85f33fb73d chore: add template for required fields (#23533) 2025-08-07 10:55:41 +08:00
hangboss1761
f3c98a274b fix: update the guiding text in the upload component (#23509) 2025-08-07 10:06:04 +08:00
lyzno1
f6c7175828 fix: make TagSelector always visible for accessibility and mobile support (#23515) 2025-08-07 09:25:26 +08:00
NeatGuyCoding
d253ca192a Feat add testcontainers test for app service (#23523) 2025-08-07 09:13:30 +08:00
Yongtao Huang
e072b7dafa Chore: remove unused variable pruned_memory (#23514) 2025-08-07 09:06:17 +08:00
Guangdong Liu
3ff52f1809 feat: Enhance response validation and parsing in tool.py (#23456)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-07 09:04:51 +08:00
ghmark675
ad61b42494 fix(node): fix unexpected extra equals sign in HTTP params (#23474) 2025-08-07 09:04:09 +08:00
lyzno1
c95761f4e6 fix: resolve i18n workflow permissions and naming issues (#23494) 2025-08-06 15:59:26 +08:00
lyzno1
1f15cba9a0 Enhance API documentation TOC with modern design and improved UX (#23490)
Co-authored-by: crazywoola <427733928@qq.com>
2025-08-06 14:45:06 +08:00
NeatGuyCoding
3344aaabb6 minor fix: translation for contentEnableLabel (#23483) 2025-08-06 14:20:32 +08:00
Yongtao Huang
b8ef0c84e6 Fix: moved file = after file presence validation (#23453)
Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com>
2025-08-06 13:44:12 +08:00
Yongtao Huang
6b8b31ff64 Remove unnecessary issubclass check (#23455)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-06 13:43:55 +08:00
lyzno1
823872d294 Fix API documentation layout and dark mode contrast issues (#23462) 2025-08-06 11:19:47 +08:00
GuanMu
724ec12bf3 Feat workflow node align (#23451) 2025-08-06 11:01:10 +08:00
crazywoola
8aac402b24 fix: can not find file (#23472) 2025-08-06 10:39:19 +08:00
Good Wood
eb12fd9461 fix: fix wrong unicodechar regx (#23468) 2025-08-06 09:48:25 +08:00
NeatGuyCoding
ad622cea9e Feat add testcontainers test for workflow draft variable service (#23466) 2025-08-06 09:47:56 +08:00
winsonwhe
2575eaf1d6 Update milvus version to LTS (#23393)
Co-authored-by: crazywoola <427733928@qq.com>
2025-08-05 21:29:38 +08:00
NeatGuyCoding
fc5ed9f316 Feat add testcontainers test for account service (#23380) 2025-08-05 20:15:39 +08:00
rouxiaomin
40a11b6942 feat(api):Enhance the scope of expired data cleanup table in the Dify… (#23414) 2025-08-05 19:57:43 +08:00
lyzno1
84543a591a i18n/sync (#23429) 2025-08-05 19:36:25 +08:00
lyzno1
2cd3fe0dce fix: Multiple UI component improvements and code quality enhancements (#23446) 2025-08-05 19:36:07 +08:00
crazywoola
5eb061466f chore: update tmpl (#23438) 2025-08-05 19:35:30 +08:00
Xiyuan Chen
52050d3dff feat(workflow): add support for release/e-* tag in build-push workflow (#23418) 2025-08-05 16:42:07 +08:00
Xiyuan Chen
904af20023 Feat/webapp opt (#23283) 2025-08-05 16:07:48 +08:00
Minamiyama
4934dbd0e6 feat(workflow): add relations panel to visualize dependencies (#21998)
Co-authored-by: crazywoola <427733928@qq.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
2025-08-05 15:08:23 +08:00
lyzno1
d080bea20b fix: resolve sidebar animation issues and improve app detail page UX (#23407) 2025-08-05 15:06:40 +08:00
lyzno1
607dfc8be7 fix: remove redundant useEffect from TagSelector component (#23406) 2025-08-05 13:15:26 +08:00
heyszt
3b5130b03d add otel instrument for redis and http request (#23371)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-05 11:14:51 +08:00
Matri Qi
75f722a959 lint: fix issue of no-unused-vars (#23375) 2025-08-05 11:12:30 +08:00
Wu Tianwei
b946378b38 fix: installed apps preview language error (#23397) 2025-08-05 11:01:31 +08:00
crazywoola
0cee57acca chore: add Template (#23395) 2025-08-05 10:43:51 +08:00
znn
ab78e12089 enhancing logging (#23332)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-05 10:36:48 +08:00
Will
7fe23a0ca6 remove useless Tool class attributes (#23389) 2025-08-05 10:20:28 +08:00
yyh
d8584dc03a feat: enhance document list navigation and sorting functionality (#23383) 2025-08-05 10:19:47 +08:00
NeatGuyCoding
a724f35672 fix: fetchAppWithTags may return empty when apps is over 100 (#23350) 2025-08-04 20:20:43 +08:00
NeatGuyCoding
60c7663a80 Feat add testcontainers test (#23269) 2025-08-04 19:27:36 +08:00
Tianyi Jing
8041808b53 fix: diplay all helpfields (#23348)
Signed-off-by: jingfelix <jingfelix@outlook.com>
2025-08-04 14:39:54 +08:00
Yongtao Huang
146d870098 Fix: avoid Flask route conflict by merging DocumentDetailApi and DocumentDeleteApi (#23333)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-04 14:37:36 +08:00
yyh
78d2f49e01 feat(i18n): enhance auto-gen script for template literals and add app-debug translations (#23344) 2025-08-04 13:30:11 +08:00
yyh
54c8bd29ee security: Fix XSS vulnerability in authentication check-code pages (#23295) 2025-08-04 12:48:38 +08:00
Yongtao Huang
406c1952b8 Fix version comparison with imported_version (#23326)
Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com>
2025-08-04 10:40:49 +08:00
znn
0ebcee9a6b fixing footer (#22927) 2025-08-04 10:14:57 +08:00
Yongtao Huang
964fa132cb Chore: fix typo, no code change (#23331) 2025-08-03 22:30:28 +08:00
yyh
dedd5f571c fix(ui): temporarily remove TagSelector from app sidebar per design review (#23329) 2025-08-03 22:30:21 +08:00
lyzno1
90373c7165 fix(i18n): correct translation errors across multiple languages (#23328) 2025-08-03 18:31:50 +08:00
znn
d470120a60 retention of data filled on tab switch for tool plugin (#23323) 2025-08-03 18:31:15 +08:00
lyzno1
0c925bd088 feat(ui): unify tag editing in app sidebar and add management entry to TagFilter (#23325) 2025-08-03 18:30:47 +08:00
Yongtao Huang
76d123fe19 Fix segment query tenant bug and variable naming typo (#23321)
Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com>
2025-08-03 18:30:09 +08:00
qiaofenlin
20f0238aab feat: support workflow version specification in workflow and chat APIs (#23188)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-08-03 16:27:12 +08:00
Warren Wong
6d5a7684b4 feat: Add Download Button to UI for Knowledge Resource Source Files (#23320) 2025-08-03 16:26:11 +08:00
znn
7831d44099 fixing chat window api recall fix on changing browser tab or window (#23301) 2025-08-03 10:14:17 +08:00
Yongtao Huang
fbf844efd5 Chore: replace deprecated datetime.utcnow() with naive_utc_now() (#23312)
Signed-off-by: Yongtao Huang <yongtaoh2022@gmail.com>
2025-08-03 10:11:47 +08:00
Matri Qi
99a4bd82b5 chore: fix function name typo (#23306) 2025-08-03 10:09:26 +08:00
Asuka Minato
58608f51da replace db with sa to get typing support (#23240) 2025-08-02 23:54:23 +08:00
NeatGuyCoding
ff9fd0cdb2 fix: fix wrong css class (#23299) 2025-08-02 17:36:15 +08:00
Matri Qi
aac849d4f4 Fix/variable input validation issue (#23300) 2025-08-02 17:35:51 +08:00
Hasta
688d07e9c3 fix the error of unable to retrieve url from file (#13603) 2025-08-02 17:15:55 +08:00
lyzno1
f9b3cd1b68 i18n: enhance check-i18n script with precise filtering and multiline support (#23298) 2025-08-02 12:52:12 +08:00
XiaoXiaoJiangYun
b2c8718f35 Update metadata_service.py (#23272) 2025-08-02 12:15:23 +08:00
XiaoXiaoJiangYun
46ba0a8781 Update metadata_service.py (#23271) 2025-08-02 12:14:43 +08:00
liujin-patsnap
bc18d4d1b9 Fix: Correct file variable handling for custom tools in workflow (#18427) 2025-08-02 03:36:04 +08:00
DazzlingSong
a4b14fc992 fix video and audio extension, keep consistent with the web page. (#23287)
Co-authored-by: wangjialei <wangjialei@xiaomi.com>
2025-08-01 22:59:38 +08:00
Yongtao Huang
be914438a5 Fix: incorrect array element validation in SegmentType (#23289) 2025-08-01 22:46:50 +08:00
Tianyi Jing
ec488a4c43 fix: type for ProviderConfig.default (#23282)
Signed-off-by: jingfelix <jingfelix@outlook.com>
2025-08-01 17:37:22 +08:00
zxhlyh
f78b903a49 Chore/variable label (#23270) 2025-08-01 15:43:36 +08:00
Yongtao Huang
fd086b06a6 CI: restrict autofix.ci to run only in official repo (#23267) 2025-08-01 15:21:31 +08:00
NeatGuyCoding
759ded3e3a minor fix: fix default for status of TidbAuthBinding in compatible with various versions (#22288) 2025-08-01 14:51:16 +08:00
Leo Zhang
05b002a8b7 Add a practical AKS one-click deployment Helm (#23253) 2025-08-01 14:22:59 +08:00
HyaCinth
f7016fd922 chore: Optimize component styles and interactions (#23250) (#23251) 2025-08-01 14:18:38 +08:00
wanttobeamaster
da5c003f97 chore: tablestore full text search support score normalization (#23255)
Co-authored-by: xiaozhiqing.xzq <xiaozhiqing.xzq@alibaba-inc.com>
2025-08-01 14:14:11 +08:00
Alan Bustamante
c33741a5e9 fix: improve boolean field handling in plugin configuration forms (#23160)
Co-authored-by: crazywoola <427733928@qq.com>
2025-08-01 10:34:46 +08:00
baonudesifeizhai
872ff3f1d4 fix: resolve multipart/form-data boundary issue in HTTP Request compo nent #22880 (#23008)
Co-authored-by: crazywoola <427733928@qq.com>
2025-08-01 10:26:50 +08:00
Alex Chim
8ab3fda5a8 [Enhancement] Allow modify conversation variable via api (#23112)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-08-01 09:34:56 +08:00
kenwoodjw
1821726d4f fix celery backend when use rabbitmq (#23238)
Signed-off-by: kenwoodjw <blackxin55+@gmail.com>
2025-08-01 09:26:05 +08:00
GuanMu
98aea05ad2 Fix: Update the background color and hover effect of secondary buttons (#23186) 2025-07-31 21:19:34 +08:00
Asuka Minato
79ea94483e refine some orm types (#22885) 2025-07-31 18:43:04 +08:00
ian woolf
a0a30bfdcc fix: set default value to avoid initializing with empty value (#23220) 2025-07-31 17:50:53 +08:00
HyaCinth
caa5928ac4 chore: Optimize dark mode styles (#23222) (#23231) 2025-07-31 17:43:08 +08:00
Maries
9400832b2b fix: correct plugin indentation (#23228) 2025-07-31 17:35:39 +08:00
lyzno1
a82b55005b fix: resolve sidebar animation glitches and layout shifts in app detail page (#23216) (#23221) 2025-07-31 16:04:49 +08:00
Yongtao Huang
a434f6240f Chroe: some misc cleanup (#23203)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-07-31 15:33:39 +08:00
NFish
f5e177db89 fix: call checkOrSetAccessToken when app access mode is PUBLIC (#23195)
Co-authored-by: crazywoola <427733928@qq.com>
2025-07-31 14:18:54 +08:00
GuanMu
5febd66808 Fix: Fix style issues (#23209) 2025-07-31 11:47:34 +08:00
Jason Young
afac1fe590 Add comprehensive security tests for file upload controller (#23102)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-07-31 10:32:16 +08:00
kenwoodjw
4251515b4e fix remote file (#23127)
Signed-off-by: kenwoodjw <blackxin55+@gmail.com>
2025-07-31 10:30:54 +08:00
lyzno1
1b2046da3f fix: prevent client-side crashes from null/undefined plugin data in workflow (#23154) (#23182) 2025-07-31 10:03:33 +08:00
znn
646900b00c fixing embedded chat styling (#23198) 2025-07-31 10:03:03 +08:00
Ali Saleh
142ab74784 feat: Enable Tracing Support For Phoenix Cloud Instance (#23196) 2025-07-30 15:58:26 -07:00
Aurelius Huang
ffddabde43 feat(notion): Notion Database extracts Rows content in row order and appends Row Page URL (#22646)
Co-authored-by: Aurelius Huang <cm.huang@aftership.com>
2025-07-30 21:35:20 +08:00
Wu Tianwei
8c6d87f08a chore: Update vulnerable eslint dependencies (#23192) 2025-07-30 21:31:23 +08:00
lyzno1
270dd955d0 chore(i18n): sync missing keys in zh-Hans and ja-JP (#23175) 2025-07-30 18:00:41 +08:00
Will
4e2129d74f fix: Error processing trace tasks (#23170) 2025-07-30 18:00:15 +08:00
NeatGuyCoding
07cff1ed2c minor fix: fix flask api resources only accept one resource for same url (#23168) 2025-07-30 17:05:02 +08:00
NeatGuyCoding
070379a900 minor fix: fix wrong check of annotation_ids (#23164) 2025-07-30 17:04:31 +08:00
Sn0rt
bbdeb15501 fix: Support URL-encoded passwords with special characters in CELERY_BROKER_URL (#23163)
Signed-off-by: Sn0rt <wangguohao.2009@gmail.com>
2025-07-30 16:39:54 +08:00
kenwoodjw
28478cdc41 feat: support metadata condition filter string array (#23111)
Signed-off-by: kenwoodjw <blackxin55+@gmail.com>
2025-07-30 16:13:45 +08:00
呆萌闷油瓶
11ec62ca70 fix: element of Array[string] and Array[number] and size attribution (#23074)
Co-authored-by: crazywoola <427733928@qq.com>
2025-07-30 15:21:50 +08:00
GuanMu
4499cda186 Feat annotations panel (#22968) 2025-07-30 13:40:48 +08:00
Joel
c05c5953a8 fix: disabled auto update but still show in plugin detail (#23150) 2025-07-30 11:15:06 +08:00
rhochman
eee576355b Fix: Support for Elasticsearch Cloud Connector (#23017)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-07-30 11:12:16 +08:00
lyzno1
a3ef869db6 fix(i18n): clean up unused keys and fix nesting & placeholder issues (#23123) 2025-07-30 10:37:44 +08:00
Yongtao Huang
a51998e4aa Fix: prevent KeyError in validate_api_list by correcting logical check (#23126) 2025-07-30 10:37:06 +08:00
znn
0b44edaca9 request fail when no api key (#23135) 2025-07-30 10:36:03 +08:00
Yongtao Huang
ab163a5f75 Chore: use Workflow.VERSION_DRAFT instead of hardcoded draft (#23136)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-07-30 10:34:51 +08:00
Yongtao Huang
f17ca26b10 Fix: add missing db.session.close() to ensure proper session cleanup (#23122) 2025-07-30 10:34:24 +08:00
lyzno1
0ea010d7ee fix: metadata API nullable validation consistency issue (#23133)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-07-30 10:33:24 +08:00
baonudesifeizhai
72a2c3decf Fix/http node timeout validation#23077 (#23117)
Co-authored-by: crazywoola <427733928@qq.com>
2025-07-29 21:40:15 +08:00
呆萌闷油瓶
ab7c2cf000 minor fix: Object of type int64 is not JSON serializable (#23109) 2025-07-29 21:40:03 +08:00
lyzno1
6914c1c85e fix(web): make iteration panel respect MAX_PARALLEL_LIMIT environment variable (#23083) (#23104) 2025-07-29 21:39:40 +08:00
crazywoola
ea542d42ca fix: i18n link in README.md (#23121) 2025-07-29 21:36:32 +08:00
NeatGuyCoding
cba5bd588c minor fix: wrong position of retry_document_indexing_task time elapsed (#23099) 2025-07-29 20:54:37 +08:00
Shaun
00cb1c26a1 refactor: pass external_trace_id to message trace (#23089)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-07-29 19:34:46 +08:00
lyzno1
f4d4a32af2 Feat/enhance i18n scripts (#23114) 2025-07-29 18:24:57 +08:00
NeatGuyCoding
1bf0df03b5 minor fix: fix some translation (#23105) 2025-07-29 16:36:29 +08:00
NeatGuyCoding
ae28ca0b8d minor fix: wrong assignment (#23103) 2025-07-29 16:36:21 +08:00
NeatGuyCoding
51a6b9dc57 hotfix: clear_all_annotations should also execute delete_annotation_index_task just like delete_app_annotation (#23093)
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-07-29 16:35:33 +08:00
zxhlyh
4c65a8091a chore: base form (#23101) 2025-07-29 15:37:16 +08:00
crazywoola
27f400e13f feat: update banner (#23095) 2025-07-29 14:05:59 +08:00
GuanMu
7721648867 Fix variable config (#23070) 2025-07-29 11:24:59 +08:00
NeatGuyCoding
47cc951841 Fix Empty Collection WHERE Filter Issue (#23086) 2025-07-29 11:17:50 +08:00
NeatGuyCoding
63b6026e6e minor fix: fix error messages (#23081) 2025-07-29 10:59:43 +08:00
KVOJJJin
84aa38586f Fix: number input can display 0 (#23084) 2025-07-29 10:59:12 +08:00
znn
a70d59d4a6 ability to click classifier during workflow execution (#23079) 2025-07-29 09:45:49 +08:00
Yongtao Huang
57e0a12ccd Refactor: remove redundant full module paths in exception handlers (#23076)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-07-29 09:40:51 +08:00
lyzno1
f5e1fa4bd2 fix(scripts): resolve i18n check script path and logic issues (#23069) 2025-07-29 09:39:10 +08:00
znn
a7ce1e5789 dark mode for overlay (#23078) 2025-07-29 09:37:40 +08:00
crazywoola
5f550126b3 Fix/23066 i18n related commands are broken (#23071)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2025-07-28 21:25:18 +08:00
lyzno1
572a2bbe53 fix(i18n): Complete missing translations and clean up legacy keys in app-debug across all locales (#23062) (#23065) 2025-07-28 19:46:27 +08:00
NeatGuyCoding
537c04745d minor fix: using the same AccountInFreezeError (#23061)
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2025-07-28 18:24:27 +08:00
Yip Chung Lam
7f004e2f41 fix: web SSO login redirect to correct basePath and origin (#23058) 2025-07-28 18:17:01 +08:00
GuanMu
7c6415551d Fixed code formatting issues in the comment header option component (#23060) 2025-07-28 16:44:50 +08:00
733 changed files with 36888 additions and 4601 deletions

1197
.env.example Normal file

File diff suppressed because it is too large Load Diff

44
.github/ISSUE_TEMPLATE/chore.yaml vendored Normal file
View File

@@ -0,0 +1,44 @@
name: "✨ Refactor"
description: Refactor existing code for improved readability and maintainability.
title: "[Chore/Refactor] "
labels:
- refactor
body:
- type: checkboxes
attributes:
label: Self Checks
description: "To make sure we get to you in time, please check the following :)"
options:
- label: I have read the [Contributing Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) and [Language Policy](https://github.com/langgenius/dify/issues/1542).
required: true
- label: This is only for refactoring, if you would like to ask a question, please head to [Discussions](https://github.com/langgenius/dify/discussions/categories/general).
required: true
- label: I have searched for existing issues [search for existing issues](https://github.com/langgenius/dify/issues), including closed ones.
required: true
- label: I confirm that I am using English to submit this report, otherwise it will be closed.
required: true
- label: 【中文用户 & Non English User】请使用英语提交否则会被关闭
required: true
- label: "Please do not modify this template :) and fill in all the required fields."
required: true
- type: textarea
id: description
attributes:
label: Description
placeholder: "Describe the refactor you are proposing."
validations:
required: true
- type: textarea
id: motivation
attributes:
label: Motivation
placeholder: "Explain why this refactor is necessary."
validations:
required: false
- type: textarea
id: additional-context
attributes:
label: Additional Context
placeholder: "Add any other context or screenshots about the request here."
validations:
required: false

View File

@@ -99,3 +99,6 @@ jobs:
- name: Run Tool
run: uv run --project api bash dev/pytest/pytest_tools.sh
- name: Run TestContainers
run: uv run --project api bash dev/pytest/pytest_testcontainers.sh

View File

@@ -9,6 +9,7 @@ permissions:
jobs:
autofix:
if: github.repository == 'langgenius/dify'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

View File

@@ -7,6 +7,7 @@ on:
- "deploy/dev"
- "deploy/enterprise"
- "build/**"
- "release/e-*"
tags:
- "*"

View File

@@ -1,13 +1,18 @@
name: Check i18n Files and Create PR
on:
pull_request:
types: [closed]
push:
branches: [main]
paths:
- 'web/i18n/en-US/*.ts'
permissions:
contents: write
pull-requests: write
jobs:
check-and-update:
if: github.event.pull_request.merged == true
if: github.repository == 'langgenius/dify'
runs-on: ubuntu-latest
defaults:
run:
@@ -15,8 +20,8 @@ jobs:
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 2 # last 2 commits
persist-credentials: false
fetch-depth: 2
token: ${{ secrets.GITHUB_TOKEN }}
- name: Check for file changes in i18n/en-US
id: check_files
@@ -27,6 +32,13 @@ jobs:
echo "Changed files: $changed_files"
if [ -n "$changed_files" ]; then
echo "FILES_CHANGED=true" >> $GITHUB_ENV
file_args=""
for file in $changed_files; do
filename=$(basename "$file" .ts)
file_args="$file_args --file=$filename"
done
echo "FILE_ARGS=$file_args" >> $GITHUB_ENV
echo "File arguments: $file_args"
else
echo "FILES_CHANGED=false" >> $GITHUB_ENV
fi
@@ -49,14 +61,15 @@ jobs:
if: env.FILES_CHANGED == 'true'
run: pnpm install --frozen-lockfile
- name: Run npm script
- name: Generate i18n translations
if: env.FILES_CHANGED == 'true'
run: pnpm run auto-gen-i18n
run: pnpm run auto-gen-i18n ${{ env.FILE_ARGS }}
- name: Create Pull Request
if: env.FILES_CHANGED == 'true'
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.GITHUB_TOKEN }}
commit-message: Update i18n files based on en-US changes
title: 'chore: translate i18n files'
body: This PR was automatically created to update i18n files based on changes in en-US locale.

1
.gitignore vendored
View File

@@ -215,3 +215,4 @@ mise.toml
# AI Assistant
.roo/
api/.env.backup
/clickzetta

View File

@@ -235,13 +235,17 @@ Quickly deploy Dify to Alibaba cloud with [Alibaba Cloud Computing Nest](https:/
One-Click deploy Dify to Alibaba Cloud with [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### Deploy to AKS with Azure Devops Pipeline
One-Click deploy Dify to AKS with [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
## Contributing
For those who'd like to contribute code, see our [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
At the same time, please consider supporting Dify by sharing it on social media and at events and conferences.
> We are looking for contributors to help translate Dify into languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
> We are looking for contributors to help translate Dify into languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
## Community & contact

View File

@@ -217,13 +217,17 @@ docker compose up -d
انشر Dify على علي بابا كلاود بنقرة واحدة باستخدام [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### استخدام Azure Devops Pipeline للنشر على AKS
انشر Dify على AKS بنقرة واحدة باستخدام [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
## المساهمة
لأولئك الذين يرغبون في المساهمة، انظر إلى [دليل المساهمة](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) لدينا.
في الوقت نفسه، يرجى النظر في دعم Dify عن طريق مشاركته على وسائل التواصل الاجتماعي وفي الفعاليات والمؤتمرات.
> نحن نبحث عن مساهمين لمساعدة في ترجمة Dify إلى لغات أخرى غير اللغة الصينية المندرين أو الإنجليزية. إذا كنت مهتمًا بالمساعدة، يرجى الاطلاع على [README للترجمة](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) لمزيد من المعلومات، واترك لنا تعليقًا في قناة `global-users` على [خادم المجتمع على Discord](https://discord.gg/8Tpq4AcN9c).
> نحن نبحث عن مساهمين لمساعدة في ترجمة Dify إلى لغات أخرى غير اللغة الصينية المندرين أو الإنجليزية. إذا كنت مهتمًا بالمساعدة، يرجى الاطلاع على [README للترجمة](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) لمزيد من المعلومات، واترك لنا تعليقًا في قناة `global-users` على [خادم المجتمع على Discord](https://discord.gg/8Tpq4AcN9c).
**المساهمون**

View File

@@ -235,13 +235,17 @@ GitHub-এ ডিফাইকে স্টার দিয়ে রাখুন
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### AKS-এ ডিপ্লয় করার জন্য Azure Devops Pipeline ব্যবহার
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) ব্যবহার করে Dify কে AKS-এ এক ক্লিকে ডিপ্লয় করুন
## Contributing
যারা কোড অবদান রাখতে চান, তাদের জন্য আমাদের [অবদান নির্দেশিকা] দেখুন (https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)।
একই সাথে, সোশ্যাল মিডিয়া এবং ইভেন্ট এবং কনফারেন্সে এটি শেয়ার করে Dify কে সমর্থন করুন।
> আমরা ম্যান্ডারিন বা ইংরেজি ছাড়া অন্য ভাষায় Dify অনুবাদ করতে সাহায্য করার জন্য অবদানকারীদের খুঁজছি। আপনি যদি সাহায্য করতে আগ্রহী হন, তাহলে আরও তথ্যের জন্য [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) দেখুন এবং আমাদের [ডিসকর্ড কমিউনিটি সার্ভার](https://discord.gg/8Tpq4AcN9c) এর `গ্লোবাল-ইউজারস` চ্যানেলে আমাদের একটি মন্তব্য করুন।
> আমরা ম্যান্ডারিন বা ইংরেজি ছাড়া অন্য ভাষায় Dify অনুবাদ করতে সাহায্য করার জন্য অবদানকারীদের খুঁজছি। আপনি যদি সাহায্য করতে আগ্রহী হন, তাহলে আরও তথ্যের জন্য [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) দেখুন এবং আমাদের [ডিসকর্ড কমিউনিটি সার্ভার](https://discord.gg/8Tpq4AcN9c) এর `গ্লোবাল-ইউজারস` চ্যানেলে আমাদের একটি মন্তব্য করুন।
## কমিউনিটি এবং যোগাযোগ

View File

@@ -233,6 +233,9 @@ docker compose up -d
使用 [阿里云数据管理DMS](https://help.aliyun.com/zh/dms/dify-in-invitational-preview) 将 Dify 一键部署到 阿里云
#### 使用 Azure Devops Pipeline 部署到AKS
使用[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) 将 Dify 一键部署到 AKS
## Star History
@@ -244,7 +247,7 @@ docker compose up -d
对于那些想要贡献代码的人,请参阅我们的[贡献指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。
同时,请考虑通过社交媒体、活动和会议来支持 Dify 的分享。
> 我们正在寻找贡献者来帮助将 Dify 翻译成除了中文和英文之外的其他语言。如果您有兴趣帮助,请参阅我们的[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)获取更多信息,并在我们的[Discord 社区服务器](https://discord.gg/8Tpq4AcN9c)的`global-users`频道中留言。
> 我们正在寻找贡献者来帮助将 Dify 翻译成除了中文和英文之外的其他语言。如果您有兴趣帮助,请参阅我们的[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)获取更多信息,并在我们的[Discord 社区服务器](https://discord.gg/8Tpq4AcN9c)的`global-users`频道中留言。
**Contributors**

View File

@@ -230,13 +230,17 @@ Bereitstellung von Dify auf AWS mit [CDK](https://aws.amazon.com/cdk/)
Ein-Klick-Bereitstellung von Dify in der Alibaba Cloud mit [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### Verwendung von Azure Devops Pipeline für AKS-Bereitstellung
Stellen Sie Dify mit einem Klick in AKS bereit, indem Sie [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) verwenden
## Contributing
Falls Sie Code beitragen möchten, lesen Sie bitte unseren [Contribution Guide](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md). Gleichzeitig bitten wir Sie, Dify zu unterstützen, indem Sie es in den sozialen Medien teilen und auf Veranstaltungen und Konferenzen präsentieren.
> Wir suchen Mitwirkende, die dabei helfen, Dify in weitere Sprachen zu übersetzen außer Mandarin oder Englisch. Wenn Sie Interesse an einer Mitarbeit haben, lesen Sie bitte die [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) für weitere Informationen und hinterlassen Sie einen Kommentar im `global-users`-Kanal unseres [Discord Community Servers](https://discord.gg/8Tpq4AcN9c).
> Wir suchen Mitwirkende, die dabei helfen, Dify in weitere Sprachen zu übersetzen außer Mandarin oder Englisch. Wenn Sie Interesse an einer Mitarbeit haben, lesen Sie bitte die [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) für weitere Informationen und hinterlassen Sie einen Kommentar im `global-users`-Kanal unseres [Discord Community Servers](https://discord.gg/8Tpq4AcN9c).
## Gemeinschaft & Kontakt

View File

@@ -230,6 +230,10 @@ Despliegue Dify en AWS usando [CDK](https://aws.amazon.com/cdk/)
Despliega Dify en Alibaba Cloud con un solo clic con [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### Uso de Azure Devops Pipeline para implementar en AKS
Implementa Dify en AKS con un clic usando [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
## Contribuir
@@ -237,7 +241,7 @@ Para aquellos que deseen contribuir con código, consulten nuestra [Guía de con
Al mismo tiempo, considera apoyar a Dify compartiéndolo en redes sociales y en eventos y conferencias.
> Estamos buscando colaboradores para ayudar con la traducción de Dify a idiomas que no sean el mandarín o el inglés. Si estás interesado en ayudar, consulta el [README de i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para obtener más información y déjanos un comentario en el canal `global-users` de nuestro [Servidor de Comunidad en Discord](https://discord.gg/8Tpq4AcN9c).
> Estamos buscando colaboradores para ayudar con la traducción de Dify a idiomas que no sean el mandarín o el inglés. Si estás interesado en ayudar, consulta el [README de i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para obtener más información y déjanos un comentario en el canal `global-users` de nuestro [Servidor de Comunidad en Discord](https://discord.gg/8Tpq4AcN9c).
**Contribuidores**

View File

@@ -228,6 +228,10 @@ Déployez Dify sur AWS en utilisant [CDK](https://aws.amazon.com/cdk/)
Déployez Dify en un clic sur Alibaba Cloud avec [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### Utilisation d'Azure Devops Pipeline pour déployer sur AKS
Déployez Dify sur AKS en un clic en utilisant [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
## Contribuer
@@ -235,7 +239,7 @@ Pour ceux qui souhaitent contribuer du code, consultez notre [Guide de contribut
Dans le même temps, veuillez envisager de soutenir Dify en le partageant sur les réseaux sociaux et lors d'événements et de conférences.
> Nous recherchons des contributeurs pour aider à traduire Dify dans des langues autres que le mandarin ou l'anglais. Si vous êtes intéressé à aider, veuillez consulter le [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) pour plus d'informations, et laissez-nous un commentaire dans le canal `global-users` de notre [Serveur communautaire Discord](https://discord.gg/8Tpq4AcN9c).
> Nous recherchons des contributeurs pour aider à traduire Dify dans des langues autres que le mandarin ou l'anglais. Si vous êtes intéressé à aider, veuillez consulter le [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) pour plus d'informations, et laissez-nous un commentaire dans le canal `global-users` de notre [Serveur communautaire Discord](https://discord.gg/8Tpq4AcN9c).
**Contributeurs**

View File

@@ -227,6 +227,10 @@ docker compose up -d
#### Alibaba Cloud Data Management
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) を利用して、DifyをAlibaba Cloudへワンクリックでデプロイできます
#### AKSへのデプロイにAzure Devops Pipelineを使用
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)を使用してDifyをAKSにワンクリックでデプロイ
## 貢献
@@ -234,7 +238,7 @@ docker compose up -d
同時に、DifyをSNSやイベント、カンファレンスで共有してサポートしていただけると幸いです。
> Difyを英語または中国語以外の言語に翻訳してくれる貢献者を募集しています。興味がある場合は、詳細については[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)を参照してください。また、[Discordコミュニティサーバー](https://discord.gg/8Tpq4AcN9c)の`global-users`チャンネルにコメントを残してください。
> Difyを英語または中国語以外の言語に翻訳してくれる貢献者を募集しています。興味がある場合は、詳細については[i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)を参照してください。また、[Discordコミュニティサーバー](https://discord.gg/8Tpq4AcN9c)の`global-users`チャンネルにコメントを残してください。
**貢献者**

View File

@@ -228,6 +228,10 @@ wa'logh nIqHom neH ghun deployment toy'wI' [CDK](https://aws.amazon.com/cdk/) lo
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### AKS 'e' Deploy je Azure Devops Pipeline lo'laH
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) lo'laH Dify AKS 'e' wa'DIch click 'e' Deploy
## Contributing
@@ -235,7 +239,7 @@ For those who'd like to contribute code, see our [Contribution Guide](https://gi
At the same time, please consider supporting Dify by sharing it on social media and at events and conferences.
> We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
> We are looking for contributors to help with translating Dify to languages other than Mandarin or English. If you are interested in helping, please see the [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) for more information, and leave us a comment in the `global-users` channel of our [Discord Community Server](https://discord.gg/8Tpq4AcN9c).
**Contributors**

View File

@@ -222,6 +222,10 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)를 통해 원클릭으로 Dify를 Alibaba Cloud에 배포할 수 있습니다
#### AKS에 배포하기 위해 Azure Devops Pipeline 사용
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)을 사용하여 Dify를 AKS에 원클릭으로 배포
## 기여
@@ -229,7 +233,7 @@ Dify를 Kubernetes에 배포하고 프리미엄 스케일링 설정을 구성했
동시에 Dify를 소셜 미디어와 행사 및 컨퍼런스에 공유하여 지원하는 것을 고려해 주시기 바랍니다.
> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요.
> 우리는 Dify를 중국어나 영어 이외의 언어로 번역하는 데 도움을 줄 수 있는 기여자를 찾고 있습니다. 도움을 주고 싶으시다면 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md)에서 더 많은 정보를 확인하시고 [Discord 커뮤니티 서버](https://discord.gg/8Tpq4AcN9c)의 `global-users` 채널에 댓글을 남겨주세요.
**기여자**

View File

@@ -227,13 +227,17 @@ Implante o Dify na AWS usando [CDK](https://aws.amazon.com/cdk/)
Implante o Dify na Alibaba Cloud com um clique usando o [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### Usando Azure Devops Pipeline para Implantar no AKS
Implante o Dify no AKS com um clique usando [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
## Contribuindo
Para aqueles que desejam contribuir com código, veja nosso [Guia de Contribuição](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md).
Ao mesmo tempo, considere apoiar o Dify compartilhando-o nas redes sociais e em eventos e conferências.
> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c).
> Estamos buscando contribuidores para ajudar na tradução do Dify para idiomas além de Mandarim e Inglês. Se você tiver interesse em ajudar, consulte o [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) para mais informações e deixe-nos um comentário no canal `global-users` em nosso [Servidor da Comunidade no Discord](https://discord.gg/8Tpq4AcN9c).
**Contribuidores**

View File

@@ -228,6 +228,10 @@ Uvedite Dify v AWS z uporabo [CDK](https://aws.amazon.com/cdk/)
Z enim klikom namestite Dify na Alibaba Cloud z [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### Uporaba Azure Devops Pipeline za uvajanje v AKS
Z enim klikom namestite Dify v AKS z uporabo [Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
## Prispevam

View File

@@ -221,13 +221,17 @@ Dify'ı bulut platformuna tek tıklamayla dağıtın [terraform](https://www.ter
[Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/) kullanarak Dify'ı tek tıkla Alibaba Cloud'a dağıtın
#### AKS'ye Dağıtım için Azure Devops Pipeline Kullanımı
[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) kullanarak Dify'ı tek tıkla AKS'ye dağıtın
## Katkıda Bulunma
Kod katkısında bulunmak isteyenler için [Katkı Kılavuzumuza](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md) bakabilirsiniz.
Aynı zamanda, lütfen Dify'ı sosyal medyada, etkinliklerde ve konferanslarda paylaşarak desteklemeyi düşünün.
> Dify'ı Mandarin veya İngilizce dışındaki dillere çevirmemize yardımcı olacak katkıda bulunanlara ihtiyacımız var. Yardımcı olmakla ilgileniyorsanız, lütfen daha fazla bilgi için [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) dosyasına bakın ve [Discord Topluluk Sunucumuzdaki](https://discord.gg/8Tpq4AcN9c) `global-users` kanalında bize bir yorum bırakın.
> Dify'ı Mandarin veya İngilizce dışındaki dillere çevirmemize yardımcı olacak katkıda bulunanlara ihtiyacımız var. Yardımcı olmakla ilgileniyorsanız, lütfen daha fazla bilgi için [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) dosyasına bakın ve [Discord Topluluk Sunucumuzdaki](https://discord.gg/8Tpq4AcN9c) `global-users` kanalında bize bir yorum bırakın.
**Katkıda Bulunanlar**

View File

@@ -233,13 +233,17 @@ Dify 的所有功能都提供相應的 API因此您可以輕鬆地將 Dify
透過 [阿里雲數據管理DMS](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/),一鍵將 Dify 部署至阿里雲
#### 使用 Azure Devops Pipeline 部署到AKS
使用[Azure Devops Pipeline Helm Chart by @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS) 將 Dify 一鍵部署到 AKS
## 貢獻
對於想要貢獻程式碼的開發者,請參閱我們的[貢獻指南](https://github.com/langgenius/dify/blob/main/CONTRIBUTING.md)。
同時,也請考慮透過在社群媒體和各種活動與會議上分享 Dify 來支持我們。
> 我們正在尋找貢獻者協助將 Dify 翻譯成中文和英文以外的語言。如果您有興趣幫忙,請查看 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) 獲取更多資訊,並在我們的 [Discord 社群伺服器](https://discord.gg/8Tpq4AcN9c) 的 `global-users` 頻道留言給我們。
> 我們正在尋找貢獻者協助將 Dify 翻譯成中文和英文以外的語言。如果您有興趣幫忙,請查看 [i18n README](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) 獲取更多資訊,並在我們的 [Discord 社群伺服器](https://discord.gg/8Tpq4AcN9c) 的 `global-users` 頻道留言給我們。
## 社群與聯絡方式

View File

@@ -224,6 +224,10 @@ Triển khai Dify trên AWS bằng [CDK](https://aws.amazon.com/cdk/)
Triển khai Dify lên Alibaba Cloud chỉ với một cú nhấp chuột bằng [Alibaba Cloud Data Management](https://www.alibabacloud.com/help/en/dms/dify-in-invitational-preview/)
#### Sử dụng Azure Devops Pipeline để Triển khai lên AKS
Triển khai Dify lên AKS chỉ với một cú nhấp chuột bằng [Azure Devops Pipeline Helm Chart bởi @LeoZhang](https://github.com/Ruiruiz30/Dify-helm-chart-AKS)
## Đóng góp
@@ -231,7 +235,7 @@ Triển khai Dify lên Alibaba Cloud chỉ với một cú nhấp chuột bằng
Đồng thời, vui lòng xem xét hỗ trợ Dify bằng cách chia sẻ nó trên mạng xã hội và tại các sự kiện và hội nghị.
> Chúng tôi đang tìm kiếm người đóng góp để giúp dịch Dify sang các ngôn ngữ khác ngoài tiếng Trung hoặc tiếng Anh. Nếu bạn quan tâm đến việc giúp đỡ, vui lòng xem [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n/README.md) để biết thêm thông tin và để lại bình luận cho chúng tôi trong kênh `global-users` của [Máy chủ Cộng đồng Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi.
> Chúng tôi đang tìm kiếm người đóng góp để giúp dịch Dify sang các ngôn ngữ khác ngoài tiếng Trung hoặc tiếng Anh. Nếu bạn quan tâm đến việc giúp đỡ, vui lòng xem [README i18n](https://github.com/langgenius/dify/blob/main/web/i18n-config/README.md) để biết thêm thông tin và để lại bình luận cho chúng tôi trong kênh `global-users` của [Máy chủ Cộng đồng Discord](https://discord.gg/8Tpq4AcN9c) của chúng tôi.
**Người đóng góp**

View File

@@ -232,6 +232,7 @@ TABLESTORE_ENDPOINT=https://instance-name.cn-hangzhou.ots.aliyuncs.com
TABLESTORE_INSTANCE_NAME=instance-name
TABLESTORE_ACCESS_KEY_ID=xxx
TABLESTORE_ACCESS_KEY_SECRET=xxx
TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE=false
# Tidb Vector configuration
TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com

View File

@@ -19,7 +19,7 @@ RUN apt-get update \
# Install Python dependencies
COPY pyproject.toml uv.lock ./
RUN uv sync --locked
RUN uv sync --locked --no-dev
# production stage
FROM base AS production

View File

@@ -5,10 +5,11 @@ import secrets
from typing import Any, Optional
import click
import sqlalchemy as sa
from flask import current_app
from pydantic import TypeAdapter
from sqlalchemy import select
from werkzeug.exceptions import NotFound
from sqlalchemy.exc import SQLAlchemyError
from configs import dify_config
from constants.languages import languages
@@ -180,8 +181,8 @@ def migrate_annotation_vector_database():
)
if not apps:
break
except NotFound:
break
except SQLAlchemyError:
raise
page += 1
for app in apps:
@@ -307,8 +308,8 @@ def migrate_knowledge_vector_database():
)
datasets = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
except NotFound:
break
except SQLAlchemyError:
raise
page += 1
for dataset in datasets:
@@ -457,7 +458,7 @@ def convert_to_agent_apps():
"""
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query))
rs = conn.execute(sa.text(sql_query))
apps = []
for i in rs:
@@ -560,8 +561,8 @@ def old_metadata_migration():
.order_by(DatasetDocument.created_at.desc())
)
documents = db.paginate(select=stmt, page=page, per_page=50, max_per_page=50, error_out=False)
except NotFound:
break
except SQLAlchemyError:
raise
if not documents:
break
for document in documents:
@@ -702,7 +703,7 @@ def fix_app_site_missing():
sql = """select apps.id as id from apps left join sites on sites.app_id=apps.id
where sites.id is null limit 1000"""
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql))
rs = conn.execute(sa.text(sql))
processed_count = 0
for i in rs:
@@ -916,7 +917,7 @@ def clear_orphaned_file_records(force: bool):
)
orphaned_message_files = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(query))
rs = conn.execute(sa.text(query))
for i in rs:
orphaned_message_files.append({"id": str(i[0]), "message_id": str(i[1])})
@@ -937,7 +938,7 @@ def clear_orphaned_file_records(force: bool):
click.echo(click.style("- Deleting orphaned message_files records", fg="white"))
query = "DELETE FROM message_files WHERE id IN :ids"
with db.engine.begin() as conn:
conn.execute(db.text(query), {"ids": tuple([record["id"] for record in orphaned_message_files])})
conn.execute(sa.text(query), {"ids": tuple([record["id"] for record in orphaned_message_files])})
click.echo(
click.style(f"Removed {len(orphaned_message_files)} orphaned message_files records.", fg="green")
)
@@ -954,7 +955,7 @@ def clear_orphaned_file_records(force: bool):
click.echo(click.style(f"- Listing file records in table {files_table['table']}", fg="white"))
query = f"SELECT {files_table['id_column']}, {files_table['key_column']} FROM {files_table['table']}"
with db.engine.begin() as conn:
rs = conn.execute(db.text(query))
rs = conn.execute(sa.text(query))
for i in rs:
all_files_in_tables.append({"table": files_table["table"], "id": str(i[0]), "key": i[1]})
click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white"))
@@ -974,7 +975,7 @@ def clear_orphaned_file_records(force: bool):
f"SELECT {ids_table['column']} FROM {ids_table['table']} WHERE {ids_table['column']} IS NOT NULL"
)
with db.engine.begin() as conn:
rs = conn.execute(db.text(query))
rs = conn.execute(sa.text(query))
for i in rs:
all_ids_in_tables.append({"table": ids_table["table"], "id": str(i[0])})
elif ids_table["type"] == "text":
@@ -989,7 +990,7 @@ def clear_orphaned_file_records(force: bool):
f"FROM {ids_table['table']}"
)
with db.engine.begin() as conn:
rs = conn.execute(db.text(query))
rs = conn.execute(sa.text(query))
for i in rs:
for j in i[0]:
all_ids_in_tables.append({"table": ids_table["table"], "id": j})
@@ -1008,7 +1009,7 @@ def clear_orphaned_file_records(force: bool):
f"FROM {ids_table['table']}"
)
with db.engine.begin() as conn:
rs = conn.execute(db.text(query))
rs = conn.execute(sa.text(query))
for i in rs:
for j in i[0]:
all_ids_in_tables.append({"table": ids_table["table"], "id": j})
@@ -1037,7 +1038,7 @@ def clear_orphaned_file_records(force: bool):
click.echo(click.style(f"- Deleting orphaned file records in table {files_table['table']}", fg="white"))
query = f"DELETE FROM {files_table['table']} WHERE {files_table['id_column']} IN :ids"
with db.engine.begin() as conn:
conn.execute(db.text(query), {"ids": tuple(orphaned_files)})
conn.execute(sa.text(query), {"ids": tuple(orphaned_files)})
except Exception as e:
click.echo(click.style(f"Error deleting orphaned file records: {str(e)}", fg="red"))
return
@@ -1107,7 +1108,7 @@ def remove_orphaned_files_on_storage(force: bool):
click.echo(click.style(f"- Listing files from table {files_table['table']}", fg="white"))
query = f"SELECT {files_table['key_column']} FROM {files_table['table']}"
with db.engine.begin() as conn:
rs = conn.execute(db.text(query))
rs = conn.execute(sa.text(query))
for i in rs:
all_files_in_tables.append(str(i[0]))
click.echo(click.style(f"Found {len(all_files_in_tables)} files in tables.", fg="white"))

View File

@@ -330,17 +330,17 @@ class HttpConfig(BaseSettings):
def WEB_API_CORS_ALLOW_ORIGINS(self) -> list[str]:
return self.inner_WEB_API_CORS_ALLOW_ORIGINS.split(",")
HTTP_REQUEST_MAX_CONNECT_TIMEOUT: Annotated[
PositiveInt, Field(ge=10, description="Maximum connection timeout in seconds for HTTP requests")
] = 10
HTTP_REQUEST_MAX_CONNECT_TIMEOUT: int = Field(
ge=1, description="Maximum connection timeout in seconds for HTTP requests", default=10
)
HTTP_REQUEST_MAX_READ_TIMEOUT: Annotated[
PositiveInt, Field(ge=60, description="Maximum read timeout in seconds for HTTP requests")
] = 60
HTTP_REQUEST_MAX_READ_TIMEOUT: int = Field(
ge=1, description="Maximum read timeout in seconds for HTTP requests", default=60
)
HTTP_REQUEST_MAX_WRITE_TIMEOUT: Annotated[
PositiveInt, Field(ge=10, description="Maximum write timeout in seconds for HTTP requests")
] = 20
HTTP_REQUEST_MAX_WRITE_TIMEOUT: int = Field(
ge=1, description="Maximum write timeout in seconds for HTTP requests", default=20
)
HTTP_REQUEST_NODE_MAX_BINARY_SIZE: PositiveInt = Field(
description="Maximum allowed size in bytes for binary data in HTTP requests",

View File

@@ -10,6 +10,7 @@ from .storage.aliyun_oss_storage_config import AliyunOSSStorageConfig
from .storage.amazon_s3_storage_config import S3StorageConfig
from .storage.azure_blob_storage_config import AzureBlobStorageConfig
from .storage.baidu_obs_storage_config import BaiduOBSStorageConfig
from .storage.clickzetta_volume_storage_config import ClickZettaVolumeStorageConfig
from .storage.google_cloud_storage_config import GoogleCloudStorageConfig
from .storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig
from .storage.oci_storage_config import OCIStorageConfig
@@ -20,6 +21,7 @@ from .storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig
from .vdb.analyticdb_config import AnalyticdbConfig
from .vdb.baidu_vector_config import BaiduVectorDBConfig
from .vdb.chroma_config import ChromaConfig
from .vdb.clickzetta_config import ClickzettaConfig
from .vdb.couchbase_config import CouchbaseConfig
from .vdb.elasticsearch_config import ElasticsearchConfig
from .vdb.huawei_cloud_config import HuaweiCloudConfig
@@ -52,6 +54,7 @@ class StorageConfig(BaseSettings):
"aliyun-oss",
"azure-blob",
"baidu-obs",
"clickzetta-volume",
"google-storage",
"huawei-obs",
"oci-storage",
@@ -61,8 +64,9 @@ class StorageConfig(BaseSettings):
"local",
] = Field(
description="Type of storage to use."
" Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', 'google-storage', "
"'huawei-obs', 'oci-storage', 'tencent-cos', 'volcengine-tos', 'supabase'. Default is 'opendal'.",
" Options: 'opendal', '(deprecated) local', 's3', 'aliyun-oss', 'azure-blob', 'baidu-obs', "
"'clickzetta-volume', 'google-storage', 'huawei-obs', 'oci-storage', 'tencent-cos', "
"'volcengine-tos', 'supabase'. Default is 'opendal'.",
default="opendal",
)
@@ -140,7 +144,8 @@ class DatabaseConfig(BaseSettings):
default="postgresql",
)
@computed_field
@computed_field # type: ignore[misc]
@property
def SQLALCHEMY_DATABASE_URI(self) -> str:
db_extras = (
f"{self.DB_EXTRAS}&client_encoding={self.DB_CHARSET}" if self.DB_CHARSET else self.DB_EXTRAS
@@ -215,7 +220,7 @@ class DatabaseConfig(BaseSettings):
class CeleryConfig(DatabaseConfig):
CELERY_BACKEND: str = Field(
description="Backend for Celery task results. Options: 'database', 'redis'.",
description="Backend for Celery task results. Options: 'database', 'redis', 'rabbitmq'.",
default="redis",
)
@@ -245,7 +250,12 @@ class CeleryConfig(DatabaseConfig):
@computed_field
def CELERY_RESULT_BACKEND(self) -> str | None:
return f"db+{self.SQLALCHEMY_DATABASE_URI}" if self.CELERY_BACKEND == "database" else self.CELERY_BROKER_URL
if self.CELERY_BACKEND in ("database", "rabbitmq"):
return f"db+{self.SQLALCHEMY_DATABASE_URI}"
elif self.CELERY_BACKEND == "redis":
return self.CELERY_BROKER_URL
else:
return None
@property
def BROKER_USE_SSL(self) -> bool:
@@ -298,6 +308,7 @@ class MiddlewareConfig(
AliyunOSSStorageConfig,
AzureBlobStorageConfig,
BaiduOBSStorageConfig,
ClickZettaVolumeStorageConfig,
GoogleCloudStorageConfig,
HuaweiCloudOBSStorageConfig,
OCIStorageConfig,
@@ -310,6 +321,7 @@ class MiddlewareConfig(
VectorStoreConfig,
AnalyticdbConfig,
ChromaConfig,
ClickzettaConfig,
HuaweiCloudConfig,
MilvusConfig,
MyScaleConfig,

View File

@@ -0,0 +1,65 @@
"""ClickZetta Volume Storage Configuration"""
from typing import Optional
from pydantic import Field
from pydantic_settings import BaseSettings
class ClickZettaVolumeStorageConfig(BaseSettings):
"""Configuration for ClickZetta Volume storage."""
CLICKZETTA_VOLUME_USERNAME: Optional[str] = Field(
description="Username for ClickZetta Volume authentication",
default=None,
)
CLICKZETTA_VOLUME_PASSWORD: Optional[str] = Field(
description="Password for ClickZetta Volume authentication",
default=None,
)
CLICKZETTA_VOLUME_INSTANCE: Optional[str] = Field(
description="ClickZetta instance identifier",
default=None,
)
CLICKZETTA_VOLUME_SERVICE: str = Field(
description="ClickZetta service endpoint",
default="api.clickzetta.com",
)
CLICKZETTA_VOLUME_WORKSPACE: str = Field(
description="ClickZetta workspace name",
default="quick_start",
)
CLICKZETTA_VOLUME_VCLUSTER: str = Field(
description="ClickZetta virtual cluster name",
default="default_ap",
)
CLICKZETTA_VOLUME_SCHEMA: str = Field(
description="ClickZetta schema name",
default="dify",
)
CLICKZETTA_VOLUME_TYPE: str = Field(
description="ClickZetta volume type (table|user|external)",
default="user",
)
CLICKZETTA_VOLUME_NAME: Optional[str] = Field(
description="ClickZetta volume name for external volumes",
default=None,
)
CLICKZETTA_VOLUME_TABLE_PREFIX: str = Field(
description="Prefix for ClickZetta volume table names",
default="dataset_",
)
CLICKZETTA_VOLUME_DIFY_PREFIX: str = Field(
description="Directory prefix for User Volume to organize Dify files",
default="dify_km",
)

View File

@@ -0,0 +1,69 @@
from typing import Optional
from pydantic import BaseModel, Field
class ClickzettaConfig(BaseModel):
"""
Clickzetta Lakehouse vector database configuration
"""
CLICKZETTA_USERNAME: Optional[str] = Field(
description="Username for authenticating with Clickzetta Lakehouse",
default=None,
)
CLICKZETTA_PASSWORD: Optional[str] = Field(
description="Password for authenticating with Clickzetta Lakehouse",
default=None,
)
CLICKZETTA_INSTANCE: Optional[str] = Field(
description="Clickzetta Lakehouse instance ID",
default=None,
)
CLICKZETTA_SERVICE: Optional[str] = Field(
description="Clickzetta API service endpoint (e.g., 'api.clickzetta.com')",
default="api.clickzetta.com",
)
CLICKZETTA_WORKSPACE: Optional[str] = Field(
description="Clickzetta workspace name",
default="default",
)
CLICKZETTA_VCLUSTER: Optional[str] = Field(
description="Clickzetta virtual cluster name",
default="default_ap",
)
CLICKZETTA_SCHEMA: Optional[str] = Field(
description="Database schema name in Clickzetta",
default="public",
)
CLICKZETTA_BATCH_SIZE: Optional[int] = Field(
description="Batch size for bulk insert operations",
default=100,
)
CLICKZETTA_ENABLE_INVERTED_INDEX: Optional[bool] = Field(
description="Enable inverted index for full-text search capabilities",
default=True,
)
CLICKZETTA_ANALYZER_TYPE: Optional[str] = Field(
description="Analyzer type for full-text search: keyword, english, chinese, unicode",
default="chinese",
)
CLICKZETTA_ANALYZER_MODE: Optional[str] = Field(
description="Analyzer mode for tokenization: max_word (fine-grained) or smart (intelligent)",
default="smart",
)
CLICKZETTA_VECTOR_DISTANCE_FUNCTION: Optional[str] = Field(
description="Distance function for vector similarity: l2_distance or cosine_distance",
default="cosine_distance",
)

View File

@@ -1,12 +1,13 @@
from typing import Optional
from pydantic import Field, PositiveInt
from pydantic import Field, PositiveInt, model_validator
from pydantic_settings import BaseSettings
class ElasticsearchConfig(BaseSettings):
"""
Configuration settings for Elasticsearch
Configuration settings for both self-managed and Elastic Cloud deployments.
Can load from environment variables or .env files.
"""
ELASTICSEARCH_HOST: Optional[str] = Field(
@@ -28,3 +29,50 @@ class ElasticsearchConfig(BaseSettings):
description="Password for authenticating with Elasticsearch (default is 'elastic')",
default="elastic",
)
# Elastic Cloud (optional)
ELASTICSEARCH_USE_CLOUD: Optional[bool] = Field(
description="Set to True to use Elastic Cloud instead of self-hosted Elasticsearch", default=False
)
ELASTICSEARCH_CLOUD_URL: Optional[str] = Field(
description="Full URL for Elastic Cloud deployment (e.g., 'https://example.es.region.aws.found.io:443')",
default=None,
)
ELASTICSEARCH_API_KEY: Optional[str] = Field(
description="API key for authenticating with Elastic Cloud", default=None
)
# Common options
ELASTICSEARCH_CA_CERTS: Optional[str] = Field(
description="Path to CA certificate file for SSL verification", default=None
)
ELASTICSEARCH_VERIFY_CERTS: bool = Field(
description="Whether to verify SSL certificates (default is False)", default=False
)
ELASTICSEARCH_REQUEST_TIMEOUT: int = Field(
description="Request timeout in milliseconds (default is 100000)", default=100000
)
ELASTICSEARCH_RETRY_ON_TIMEOUT: bool = Field(
description="Whether to retry requests on timeout (default is True)", default=True
)
ELASTICSEARCH_MAX_RETRIES: int = Field(
description="Maximum number of retry attempts (default is 10000)", default=10000
)
@model_validator(mode="after")
def validate_elasticsearch_config(self):
"""Validate Elasticsearch configuration based on deployment type."""
if self.ELASTICSEARCH_USE_CLOUD:
if not self.ELASTICSEARCH_CLOUD_URL:
raise ValueError("ELASTICSEARCH_CLOUD_URL is required when using Elastic Cloud")
if not self.ELASTICSEARCH_API_KEY:
raise ValueError("ELASTICSEARCH_API_KEY is required when using Elastic Cloud")
else:
if not self.ELASTICSEARCH_HOST:
raise ValueError("ELASTICSEARCH_HOST is required for self-hosted Elasticsearch")
if not self.ELASTICSEARCH_USERNAME:
raise ValueError("ELASTICSEARCH_USERNAME is required for self-hosted Elasticsearch")
if not self.ELASTICSEARCH_PASSWORD:
raise ValueError("ELASTICSEARCH_PASSWORD is required for self-hosted Elasticsearch")
return self

View File

@@ -28,3 +28,8 @@ class TableStoreConfig(BaseSettings):
description="AccessKey secret for the instance name",
default=None,
)
TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE: bool = Field(
description="Whether to normalize full-text search scores to [0, 1]",
default=False,
)

View File

@@ -9,10 +9,10 @@ DEFAULT_FILE_NUMBER_LIMITS = 3
IMAGE_EXTENSIONS = ["jpg", "jpeg", "png", "webp", "gif", "svg"]
IMAGE_EXTENSIONS.extend([ext.upper() for ext in IMAGE_EXTENSIONS])
VIDEO_EXTENSIONS = ["mp4", "mov", "mpeg", "mpga"]
VIDEO_EXTENSIONS = ["mp4", "mov", "mpeg", "webm"]
VIDEO_EXTENSIONS.extend([ext.upper() for ext in VIDEO_EXTENSIONS])
AUDIO_EXTENSIONS = ["mp3", "m4a", "wav", "webm", "amr"]
AUDIO_EXTENSIONS = ["mp3", "m4a", "wav", "amr", "mpga"]
AUDIO_EXTENSIONS.extend([ext.upper() for ext in AUDIO_EXTENSIONS])

View File

@@ -84,6 +84,7 @@ from .datasets import (
external,
hit_testing,
metadata,
upload_file,
website,
)

View File

@@ -100,7 +100,7 @@ class AnnotationReplyActionStatusApi(Resource):
return {"job_id": job_id, "job_status": job_status, "error_msg": error_msg}, 200
class AnnotationListApi(Resource):
class AnnotationApi(Resource):
@setup_required
@login_required
@account_initialization_required
@@ -123,6 +123,23 @@ class AnnotationListApi(Resource):
}
return response, 200
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("annotation")
@marshal_with(annotation_fields)
def post(self, app_id):
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
parser = reqparse.RequestParser()
parser.add_argument("question", required=True, type=str, location="json")
parser.add_argument("answer", required=True, type=str, location="json")
args = parser.parse_args()
annotation = AppAnnotationService.insert_app_annotation_directly(args, app_id)
return annotation
@setup_required
@login_required
@account_initialization_required
@@ -131,8 +148,25 @@ class AnnotationListApi(Resource):
raise Forbidden()
app_id = str(app_id)
AppAnnotationService.clear_all_annotations(app_id)
return {"result": "success"}, 204
# Use request.args.getlist to get annotation_ids array directly
annotation_ids = request.args.getlist("annotation_id")
# If annotation_ids are provided, handle batch deletion
if annotation_ids:
# Check if any annotation_ids contain empty strings or invalid values
if not all(annotation_id.strip() for annotation_id in annotation_ids if annotation_id):
return {
"code": "bad_request",
"message": "annotation_ids are required if the parameter is provided.",
}, 400
result = AppAnnotationService.delete_app_annotations_in_batch(app_id, annotation_ids)
return result, 204
# If no annotation_ids are provided, handle clearing all annotations
else:
AppAnnotationService.clear_all_annotations(app_id)
return {"result": "success"}, 204
class AnnotationExportApi(Resource):
@@ -149,25 +183,6 @@ class AnnotationExportApi(Resource):
return response, 200
class AnnotationCreateApi(Resource):
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_resource_check("annotation")
@marshal_with(annotation_fields)
def post(self, app_id):
if not current_user.is_editor:
raise Forbidden()
app_id = str(app_id)
parser = reqparse.RequestParser()
parser.add_argument("question", required=True, type=str, location="json")
parser.add_argument("answer", required=True, type=str, location="json")
args = parser.parse_args()
annotation = AppAnnotationService.insert_app_annotation_directly(args, app_id)
return annotation
class AnnotationUpdateDeleteApi(Resource):
@setup_required
@login_required
@@ -210,14 +225,15 @@ class AnnotationBatchImportApi(Resource):
raise Forbidden()
app_id = str(app_id)
# get file from request
file = request.files["file"]
# check file
if "file" not in request.files:
raise NoFileUploadedError()
if len(request.files) > 1:
raise TooManyFilesError()
# get file from request
file = request.files["file"]
# check file type
if not file.filename or not file.filename.lower().endswith(".csv"):
raise ValueError("Invalid file type. Only CSV files are allowed")
@@ -276,7 +292,7 @@ api.add_resource(AnnotationReplyActionApi, "/apps/<uuid:app_id>/annotation-reply
api.add_resource(
AnnotationReplyActionStatusApi, "/apps/<uuid:app_id>/annotation-reply/<string:action>/status/<uuid:job_id>"
)
api.add_resource(AnnotationListApi, "/apps/<uuid:app_id>/annotations")
api.add_resource(AnnotationApi, "/apps/<uuid:app_id>/annotations")
api.add_resource(AnnotationExportApi, "/apps/<uuid:app_id>/annotations/export")
api.add_resource(AnnotationUpdateDeleteApi, "/apps/<uuid:app_id>/annotations/<uuid:annotation_id>")
api.add_resource(AnnotationBatchImportApi, "/apps/<uuid:app_id>/annotations/batch-import")

View File

@@ -28,6 +28,12 @@ from services.feature_service import FeatureService
ALLOW_CREATE_APP_MODES = ["chat", "agent-chat", "advanced-chat", "workflow", "completion"]
def _validate_description_length(description):
if description and len(description) > 400:
raise ValueError("Description cannot exceed 400 characters.")
return description
class AppListApi(Resource):
@setup_required
@login_required
@@ -94,7 +100,7 @@ class AppListApi(Resource):
"""Create app"""
parser = reqparse.RequestParser()
parser.add_argument("name", type=str, required=True, location="json")
parser.add_argument("description", type=str, location="json")
parser.add_argument("description", type=_validate_description_length, location="json")
parser.add_argument("mode", type=str, choices=ALLOW_CREATE_APP_MODES, location="json")
parser.add_argument("icon_type", type=str, location="json")
parser.add_argument("icon", type=str, location="json")
@@ -146,7 +152,7 @@ class AppApi(Resource):
parser = reqparse.RequestParser()
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
parser.add_argument("description", type=str, location="json")
parser.add_argument("description", type=_validate_description_length, location="json")
parser.add_argument("icon_type", type=str, location="json")
parser.add_argument("icon", type=str, location="json")
parser.add_argument("icon_background", type=str, location="json")
@@ -189,7 +195,7 @@ class AppCopyApi(Resource):
parser = reqparse.RequestParser()
parser.add_argument("name", type=str, location="json")
parser.add_argument("description", type=str, location="json")
parser.add_argument("description", type=_validate_description_length, location="json")
parser.add_argument("icon_type", type=str, location="json")
parser.add_argument("icon", type=str, location="json")
parser.add_argument("icon_background", type=str, location="json")

View File

@@ -5,7 +5,6 @@ from flask_restful import Resource, fields, marshal_with, reqparse
from flask_restful.inputs import int_range
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
import services
from controllers.console import api
from controllers.console.app.error import (
CompletionRequestError,
@@ -133,7 +132,7 @@ class MessageFeedbackApi(Resource):
rating=args.get("rating"),
content=None,
)
except services.errors.message.MessageNotExistsError:
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
return {"result": "success"}

View File

@@ -67,7 +67,7 @@ WHERE
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append({"date": str(i.date), "message_count": i.message_count})
@@ -176,7 +176,7 @@ WHERE
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append({"date": str(i.date), "terminal_count": i.terminal_count})
@@ -234,7 +234,7 @@ WHERE
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append(
{"date": str(i.date), "token_count": i.token_count, "total_price": i.total_price, "currency": "USD"}
@@ -310,7 +310,7 @@ ORDER BY
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append(
{"date": str(i.date), "interactions": float(i.interactions.quantize(Decimal("0.01")))}
@@ -373,7 +373,7 @@ WHERE
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append(
{
@@ -435,7 +435,7 @@ WHERE
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append({"date": str(i.date), "latency": round(i.latency * 1000, 4)})
@@ -495,7 +495,7 @@ WHERE
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append({"date": str(i.date), "tps": round(i.tokens_per_second, 4)})

View File

@@ -2,6 +2,7 @@ from datetime import datetime
from decimal import Decimal
import pytz
import sqlalchemy as sa
from flask import jsonify
from flask_login import current_user
from flask_restful import Resource, reqparse
@@ -71,7 +72,7 @@ WHERE
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append({"date": str(i.date), "runs": i.runs})
@@ -133,7 +134,7 @@ WHERE
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append({"date": str(i.date), "terminal_count": i.terminal_count})
@@ -195,7 +196,7 @@ WHERE
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append(
{
@@ -277,7 +278,7 @@ GROUP BY
response_data = []
with db.engine.begin() as conn:
rs = conn.execute(db.text(sql_query), arg_dict)
rs = conn.execute(sa.text(sql_query), arg_dict)
for i in rs:
response_data.append(
{"date": str(i.date), "interactions": float(i.interactions.quantize(Decimal("0.01")))}

View File

@@ -113,9 +113,3 @@ class MemberNotInTenantError(BaseHTTPException):
error_code = "member_not_in_tenant"
description = "The member is not in the workspace."
code = 400
class AccountInFreezeError(BaseHTTPException):
error_code = "account_in_freeze"
description = "This email is temporarily unavailable."
code = 400

View File

@@ -41,7 +41,7 @@ def _validate_name(name):
def _validate_description_length(description):
if len(description) > 400:
if description and len(description) > 400:
raise ValueError("Description cannot exceed 400 characters.")
return description
@@ -113,7 +113,7 @@ class DatasetListApi(Resource):
)
parser.add_argument(
"description",
type=str,
type=_validate_description_length,
nullable=True,
required=False,
default="",
@@ -683,6 +683,7 @@ class DatasetRetrievalSettingApi(Resource):
| VectorType.HUAWEI_CLOUD
| VectorType.TENCENT
| VectorType.MATRIXONE
| VectorType.CLICKZETTA
):
return {
"retrieval_method": [
@@ -731,6 +732,7 @@ class DatasetRetrievalSettingMockApi(Resource):
| VectorType.TENCENT
| VectorType.HUAWEI_CLOUD
| VectorType.MATRIXONE
| VectorType.CLICKZETTA
):
return {
"retrieval_method": [

View File

@@ -642,7 +642,7 @@ class DocumentIndexingStatusApi(DocumentResource):
return marshal(document_dict, document_status_fields)
class DocumentDetailApi(DocumentResource):
class DocumentApi(DocumentResource):
METADATA_CHOICES = {"all", "only", "without"}
@setup_required
@@ -730,6 +730,28 @@ class DocumentDetailApi(DocumentResource):
return response, 200
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
def delete(self, dataset_id, document_id):
dataset_id = str(dataset_id)
document_id = str(document_id)
dataset = DatasetService.get_dataset(dataset_id)
if dataset is None:
raise NotFound("Dataset not found.")
# check user's model setting
DatasetService.check_dataset_model_setting(dataset)
document = self.get_document(dataset_id, document_id)
try:
DocumentService.delete_document(document)
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError("Cannot delete document during indexing.")
return {"result": "success"}, 204
class DocumentProcessingApi(DocumentResource):
@setup_required
@@ -768,30 +790,6 @@ class DocumentProcessingApi(DocumentResource):
return {"result": "success"}, 200
class DocumentDeleteApi(DocumentResource):
@setup_required
@login_required
@account_initialization_required
@cloud_edition_billing_rate_limit_check("knowledge")
def delete(self, dataset_id, document_id):
dataset_id = str(dataset_id)
document_id = str(document_id)
dataset = DatasetService.get_dataset(dataset_id)
if dataset is None:
raise NotFound("Dataset not found.")
# check user's model setting
DatasetService.check_dataset_model_setting(dataset)
document = self.get_document(dataset_id, document_id)
try:
DocumentService.delete_document(document)
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError("Cannot delete document during indexing.")
return {"result": "success"}, 204
class DocumentMetadataApi(DocumentResource):
@setup_required
@login_required
@@ -1037,11 +1035,10 @@ api.add_resource(
api.add_resource(DocumentBatchIndexingEstimateApi, "/datasets/<uuid:dataset_id>/batch/<string:batch>/indexing-estimate")
api.add_resource(DocumentBatchIndexingStatusApi, "/datasets/<uuid:dataset_id>/batch/<string:batch>/indexing-status")
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-status")
api.add_resource(DocumentDetailApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
api.add_resource(DocumentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
api.add_resource(
DocumentProcessingApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/<string:action>"
)
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
api.add_resource(DocumentMetadataApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/metadata")
api.add_resource(DocumentStatusApi, "/datasets/<uuid:dataset_id>/documents/status/<string:action>/batch")
api.add_resource(DocumentPauseApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause")

View File

@@ -22,8 +22,8 @@ class DatasetMetadataCreateApi(Resource):
@marshal_with(dataset_metadata_fields)
def post(self, dataset_id):
parser = reqparse.RequestParser()
parser.add_argument("type", type=str, required=True, nullable=True, location="json")
parser.add_argument("name", type=str, required=True, nullable=True, location="json")
parser.add_argument("type", type=str, required=True, nullable=False, location="json")
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
args = parser.parse_args()
metadata_args = MetadataArgs(**args)
@@ -56,7 +56,7 @@ class DatasetMetadataApi(Resource):
@marshal_with(dataset_metadata_fields)
def patch(self, dataset_id, metadata_id):
parser = reqparse.RequestParser()
parser.add_argument("name", type=str, required=True, nullable=True, location="json")
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
args = parser.parse_args()
dataset_id_str = str(dataset_id)
@@ -127,7 +127,7 @@ class DocumentMetadataEditApi(Resource):
DatasetService.check_dataset_permission(dataset, current_user)
parser = reqparse.RequestParser()
parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json")
parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json")
args = parser.parse_args()
metadata_args = MetadataOperationData(**args)

View File

@@ -0,0 +1,62 @@
from flask_login import current_user
from flask_restful import Resource
from werkzeug.exceptions import NotFound
from controllers.console import api
from controllers.console.wraps import (
account_initialization_required,
setup_required,
)
from core.file import helpers as file_helpers
from extensions.ext_database import db
from models.dataset import Dataset
from models.model import UploadFile
from services.dataset_service import DocumentService
class UploadFileApi(Resource):
@setup_required
@account_initialization_required
def get(self, dataset_id, document_id):
"""Get upload file."""
# check dataset
dataset_id = str(dataset_id)
dataset = (
db.session.query(Dataset)
.filter(Dataset.tenant_id == current_user.current_tenant_id, Dataset.id == dataset_id)
.first()
)
if not dataset:
raise NotFound("Dataset not found.")
# check document
document_id = str(document_id)
document = DocumentService.get_document(dataset.id, document_id)
if not document:
raise NotFound("Document not found.")
# check upload file
if document.data_source_type != "upload_file":
raise ValueError(f"Document data source type ({document.data_source_type}) is not upload_file.")
data_source_info = document.data_source_info_dict
if data_source_info and "upload_file_id" in data_source_info:
file_id = data_source_info["upload_file_id"]
upload_file = db.session.query(UploadFile).filter(UploadFile.id == file_id).first()
if not upload_file:
raise NotFound("UploadFile not found.")
else:
raise ValueError("Upload file id not found in document data source info.")
url = file_helpers.get_signed_file_url(upload_file_id=upload_file.id)
return {
"id": upload_file.id,
"name": upload_file.name,
"size": upload_file.size,
"extension": upload_file.extension,
"url": url,
"download_url": f"{url}&as_attachment=true",
"mime_type": upload_file.mime_type,
"created_by": upload_file.created_by,
"created_at": upload_file.created_at.timestamp(),
}, 200
api.add_resource(UploadFileApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/upload-file")

View File

@@ -127,7 +127,7 @@ class EducationActivateLimitError(BaseHTTPException):
code = 429
class CompilanceRateLimitError(BaseHTTPException):
error_code = "compilance_rate_limit"
class ComplianceRateLimitError(BaseHTTPException):
error_code = "compliance_rate_limit"
description = "Rate limit exceeded for downloading compliance report."
code = 429

View File

@@ -58,21 +58,38 @@ class InstalledAppsListApi(Resource):
# filter out apps that user doesn't have access to
if FeatureService.get_system_features().webapp_auth.enabled:
user_id = current_user.id
res = []
app_ids = [installed_app["app"].id for installed_app in installed_app_list]
webapp_settings = EnterpriseService.WebAppAuth.batch_get_app_access_mode_by_id(app_ids)
# Pre-filter out apps without setting or with sso_verified
filtered_installed_apps = []
app_id_to_app_code = {}
for installed_app in installed_app_list:
webapp_setting = webapp_settings.get(installed_app["app"].id)
if not webapp_setting:
app_id = installed_app["app"].id
webapp_setting = webapp_settings.get(app_id)
if not webapp_setting or webapp_setting.access_mode == "sso_verified":
continue
if webapp_setting.access_mode == "sso_verified":
continue
app_code = AppService.get_app_code_by_id(str(installed_app["app"].id))
if EnterpriseService.WebAppAuth.is_user_allowed_to_access_webapp(
user_id=user_id,
app_code=app_code,
):
app_code = AppService.get_app_code_by_id(str(app_id))
app_id_to_app_code[app_id] = app_code
filtered_installed_apps.append(installed_app)
app_codes = list(app_id_to_app_code.values())
# Batch permission check
permissions = EnterpriseService.WebAppAuth.batch_is_user_allowed_to_access_webapps(
user_id=user_id,
app_codes=app_codes,
)
# Keep only allowed apps
res = []
for installed_app in filtered_installed_apps:
app_id = installed_app["app"].id
app_code = app_id_to_app_code[app_id]
if permissions.get(app_code):
res.append(installed_app)
installed_app_list = res
logger.debug("installed_app_list: %s, user_id: %s", installed_app_list, user_id)

View File

@@ -5,7 +5,6 @@ from flask_restful import marshal_with, reqparse
from flask_restful.inputs import int_range
from werkzeug.exceptions import InternalServerError, NotFound
import services
from controllers.console.app.error import (
AppMoreLikeThisDisabledError,
CompletionRequestError,
@@ -29,7 +28,11 @@ from models.model import AppMode
from services.app_generate_service import AppGenerateService
from services.errors.app import MoreLikeThisDisabledError
from services.errors.conversation import ConversationNotExistsError
from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
from services.errors.message import (
FirstMessageNotExistsError,
MessageNotExistsError,
SuggestedQuestionsAfterAnswerDisabledError,
)
from services.message_service import MessageService
@@ -52,9 +55,9 @@ class MessageListApi(InstalledAppResource):
return MessageService.pagination_by_first_id(
app_model, current_user, args["conversation_id"], args["first_id"], args["limit"]
)
except services.errors.conversation.ConversationNotExistsError:
except ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.message.FirstMessageNotExistsError:
except FirstMessageNotExistsError:
raise NotFound("First Message Not Exists.")
@@ -77,7 +80,7 @@ class MessageFeedbackApi(InstalledAppResource):
rating=args.get("rating"),
content=args.get("content"),
)
except services.errors.message.MessageNotExistsError:
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
return {"result": "success"}

View File

@@ -49,7 +49,6 @@ class FileApi(Resource):
@marshal_with(file_fields)
@cloud_edition_billing_resource_check("documents")
def post(self):
file = request.files["file"]
source_str = request.form.get("source")
source: Literal["datasets"] | None = "datasets" if source_str == "datasets" else None
@@ -58,6 +57,7 @@ class FileApi(Resource):
if len(request.files) > 1:
raise TooManyFilesError()
file = request.files["file"]
if not file.filename:
raise FilenameNotExistsError

View File

@@ -9,14 +9,13 @@ from configs import dify_config
from constants.languages import supported_language
from controllers.console import api
from controllers.console.auth.error import (
AccountInFreezeError,
EmailAlreadyInUseError,
EmailChangeLimitError,
EmailCodeError,
InvalidEmailError,
InvalidTokenError,
)
from controllers.console.error import AccountNotFound, EmailSendIpLimitError
from controllers.console.error import AccountInFreezeError, AccountNotFound, EmailSendIpLimitError
from controllers.console.workspace.error import (
AccountAlreadyInitedError,
CurrentPasswordIncorrectError,

View File

@@ -191,9 +191,6 @@ class WebappLogoWorkspaceApi(Resource):
@account_initialization_required
@cloud_edition_billing_resource_check("workspace_custom")
def post(self):
# get file from request
file = request.files["file"]
# check file
if "file" not in request.files:
raise NoFileUploadedError()
@@ -201,6 +198,8 @@ class WebappLogoWorkspaceApi(Resource):
if len(request.files) > 1:
raise TooManyFilesError()
# get file from request
file = request.files["file"]
if not file.filename:
raise FilenameNotExistsError

View File

@@ -6,6 +6,6 @@ bp = Blueprint("service_api", __name__, url_prefix="/v1")
api = ExternalApi(bp)
from . import index
from .app import annotation, app, audio, completion, conversation, file, message, site, workflow
from .app import annotation, app, audio, completion, conversation, file, file_preview, message, site, workflow
from .dataset import dataset, document, hit_testing, metadata, segment, upload_file
from .workspace import models

View File

@@ -2,7 +2,7 @@ import logging
from flask import request
from flask_restful import Resource, reqparse
from werkzeug.exceptions import InternalServerError, NotFound
from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
import services
from controllers.service_api import api
@@ -30,6 +30,7 @@ from libs import helper
from libs.helper import uuid_value
from models.model import App, AppMode, EndUser
from services.app_generate_service import AppGenerateService
from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError
from services.errors.llm import InvokeRateLimitError
@@ -47,6 +48,9 @@ class CompletionApi(Resource):
parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
args = parser.parse_args()
external_trace_id = get_external_trace_id(request)
if external_trace_id:
args["external_trace_id"] = external_trace_id
streaming = args["response_mode"] == "streaming"
@@ -110,7 +114,7 @@ class ChatApi(Resource):
parser.add_argument("conversation_id", type=uuid_value, location="json")
parser.add_argument("retriever_from", type=str, required=False, default="dev", location="json")
parser.add_argument("auto_generate_name", type=bool, required=False, default=True, location="json")
parser.add_argument("workflow_id", type=str, required=False, location="json")
args = parser.parse_args()
external_trace_id = get_external_trace_id(request)
@@ -125,6 +129,12 @@ class ChatApi(Resource):
)
return helper.compact_generate_response(response)
except WorkflowNotFoundError as ex:
raise NotFound(str(ex))
except IsDraftWorkflowError as ex:
raise BadRequest(str(ex))
except WorkflowIdFormatError as ex:
raise BadRequest(str(ex))
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:

View File

@@ -1,7 +1,9 @@
import json
from flask_restful import Resource, marshal_with, reqparse
from flask_restful.inputs import int_range
from sqlalchemy.orm import Session
from werkzeug.exceptions import NotFound
from werkzeug.exceptions import BadRequest, NotFound
import services
from controllers.service_api import api
@@ -15,6 +17,7 @@ from fields.conversation_fields import (
simple_conversation_fields,
)
from fields.conversation_variable_fields import (
conversation_variable_fields,
conversation_variable_infinite_scroll_pagination_fields,
)
from libs.helper import uuid_value
@@ -120,7 +123,41 @@ class ConversationVariablesApi(Resource):
raise NotFound("Conversation Not Exists.")
class ConversationVariableDetailApi(Resource):
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
@marshal_with(conversation_variable_fields)
def put(self, app_model: App, end_user: EndUser, c_id, variable_id):
"""Update a conversation variable's value"""
app_mode = AppMode.value_of(app_model.mode)
if app_mode not in {AppMode.CHAT, AppMode.AGENT_CHAT, AppMode.ADVANCED_CHAT}:
raise NotChatAppError()
conversation_id = str(c_id)
variable_id = str(variable_id)
parser = reqparse.RequestParser()
parser.add_argument("value", required=True, location="json")
args = parser.parse_args()
try:
return ConversationService.update_conversation_variable(
app_model, conversation_id, variable_id, end_user, json.loads(args["value"])
)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationVariableNotExistsError:
raise NotFound("Conversation Variable Not Exists.")
except services.errors.conversation.ConversationVariableTypeMismatchError as e:
raise BadRequest(str(e))
api.add_resource(ConversationRenameApi, "/conversations/<uuid:c_id>/name", endpoint="conversation_name")
api.add_resource(ConversationApi, "/conversations")
api.add_resource(ConversationDetailApi, "/conversations/<uuid:c_id>", endpoint="conversation_detail")
api.add_resource(ConversationVariablesApi, "/conversations/<uuid:c_id>/variables", endpoint="conversation_variables")
api.add_resource(
ConversationVariableDetailApi,
"/conversations/<uuid:c_id>/variables/<uuid:variable_id>",
endpoint="conversation_variable_detail",
methods=["PUT"],
)

View File

@@ -107,3 +107,15 @@ class UnsupportedFileTypeError(BaseHTTPException):
error_code = "unsupported_file_type"
description = "File type not allowed."
code = 415
class FileNotFoundError(BaseHTTPException):
error_code = "file_not_found"
description = "The requested file was not found."
code = 404
class FileAccessDeniedError(BaseHTTPException):
error_code = "file_access_denied"
description = "Access to the requested file is denied."
code = 403

View File

@@ -20,18 +20,17 @@ class FileApi(Resource):
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM))
@marshal_with(file_fields)
def post(self, app_model: App, end_user: EndUser):
file = request.files["file"]
# check file
if "file" not in request.files:
raise NoFileUploadedError()
if not file.mimetype:
raise UnsupportedFileTypeError()
if len(request.files) > 1:
raise TooManyFilesError()
file = request.files["file"]
if not file.mimetype:
raise UnsupportedFileTypeError()
if not file.filename:
raise FilenameNotExistsError

View File

@@ -0,0 +1,186 @@
import logging
from urllib.parse import quote
from flask import Response
from flask_restful import Resource, reqparse
from controllers.service_api import api
from controllers.service_api.app.error import (
FileAccessDeniedError,
FileNotFoundError,
)
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
from extensions.ext_database import db
from extensions.ext_storage import storage
from models.model import App, EndUser, Message, MessageFile, UploadFile
logger = logging.getLogger(__name__)
class FilePreviewApi(Resource):
"""
Service API File Preview endpoint
Provides secure file preview/download functionality for external API users.
Files can only be accessed if they belong to messages within the requesting app's context.
"""
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.QUERY))
def get(self, app_model: App, end_user: EndUser, file_id: str):
"""
Preview/Download a file that was uploaded via Service API
Args:
app_model: The authenticated app model
end_user: The authenticated end user (optional)
file_id: UUID of the file to preview
Query Parameters:
user: Optional user identifier
as_attachment: Boolean, whether to download as attachment (default: false)
Returns:
Stream response with file content
Raises:
FileNotFoundError: File does not exist
FileAccessDeniedError: File access denied (not owned by app)
"""
file_id = str(file_id)
# Parse query parameters
parser = reqparse.RequestParser()
parser.add_argument("as_attachment", type=bool, required=False, default=False, location="args")
args = parser.parse_args()
# Validate file ownership and get file objects
message_file, upload_file = self._validate_file_ownership(file_id, app_model.id)
# Get file content generator
try:
generator = storage.load(upload_file.key, stream=True)
except Exception as e:
raise FileNotFoundError(f"Failed to load file content: {str(e)}")
# Build response with appropriate headers
response = self._build_file_response(generator, upload_file, args["as_attachment"])
return response
def _validate_file_ownership(self, file_id: str, app_id: str) -> tuple[MessageFile, UploadFile]:
"""
Validate that the file belongs to a message within the requesting app's context
Security validations performed:
1. File exists in MessageFile table (was used in a conversation)
2. Message belongs to the requesting app
3. UploadFile record exists and is accessible
4. File tenant matches app tenant (additional security layer)
Args:
file_id: UUID of the file to validate
app_id: UUID of the requesting app
Returns:
Tuple of (MessageFile, UploadFile) if validation passes
Raises:
FileNotFoundError: File or related records not found
FileAccessDeniedError: File does not belong to the app's context
"""
try:
# Input validation
if not file_id or not app_id:
raise FileAccessDeniedError("Invalid file or app identifier")
# First, find the MessageFile that references this upload file
message_file = db.session.query(MessageFile).where(MessageFile.upload_file_id == file_id).first()
if not message_file:
raise FileNotFoundError("File not found in message context")
# Get the message and verify it belongs to the requesting app
message = (
db.session.query(Message).where(Message.id == message_file.message_id, Message.app_id == app_id).first()
)
if not message:
raise FileAccessDeniedError("File access denied: not owned by requesting app")
# Get the actual upload file record
upload_file = db.session.query(UploadFile).where(UploadFile.id == file_id).first()
if not upload_file:
raise FileNotFoundError("Upload file record not found")
# Additional security: verify tenant isolation
app = db.session.query(App).where(App.id == app_id).first()
if app and upload_file.tenant_id != app.tenant_id:
raise FileAccessDeniedError("File access denied: tenant mismatch")
return message_file, upload_file
except (FileNotFoundError, FileAccessDeniedError):
# Re-raise our custom exceptions
raise
except Exception as e:
# Log unexpected errors for debugging
logger.exception(
"Unexpected error during file ownership validation",
extra={"file_id": file_id, "app_id": app_id, "error": str(e)},
)
raise FileAccessDeniedError("File access validation failed")
def _build_file_response(self, generator, upload_file: UploadFile, as_attachment: bool = False) -> Response:
"""
Build Flask Response object with appropriate headers for file streaming
Args:
generator: File content generator from storage
upload_file: UploadFile database record
as_attachment: Whether to set Content-Disposition as attachment
Returns:
Flask Response object with streaming file content
"""
response = Response(
generator,
mimetype=upload_file.mime_type,
direct_passthrough=True,
headers={},
)
# Add Content-Length if known
if upload_file.size and upload_file.size > 0:
response.headers["Content-Length"] = str(upload_file.size)
# Add Accept-Ranges header for audio/video files to support seeking
if upload_file.mime_type in [
"audio/mpeg",
"audio/wav",
"audio/mp4",
"audio/ogg",
"audio/flac",
"audio/aac",
"video/mp4",
"video/webm",
"video/quicktime",
"audio/x-m4a",
]:
response.headers["Accept-Ranges"] = "bytes"
# Set Content-Disposition for downloads
if as_attachment and upload_file.name:
encoded_filename = quote(upload_file.name)
response.headers["Content-Disposition"] = f"attachment; filename*=UTF-8''{encoded_filename}"
# Override content-type for downloads to force download
response.headers["Content-Type"] = "application/octet-stream"
# Add caching headers for performance
response.headers["Cache-Control"] = "public, max-age=3600" # Cache for 1 hour
return response
# Register the API endpoint
api.add_resource(FilePreviewApi, "/files/<uuid:file_id>/preview")

View File

@@ -15,7 +15,11 @@ from fields.message_fields import agent_thought_fields, feedback_fields
from fields.raws import FilesContainedField
from libs.helper import TimestampField, uuid_value
from models.model import App, AppMode, EndUser
from services.errors.message import SuggestedQuestionsAfterAnswerDisabledError
from services.errors.message import (
FirstMessageNotExistsError,
MessageNotExistsError,
SuggestedQuestionsAfterAnswerDisabledError,
)
from services.message_service import MessageService
@@ -65,7 +69,7 @@ class MessageListApi(Resource):
)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.message.FirstMessageNotExistsError:
except FirstMessageNotExistsError:
raise NotFound("First Message Not Exists.")
@@ -87,7 +91,7 @@ class MessageFeedbackApi(Resource):
rating=args.get("rating"),
content=args.get("content"),
)
except services.errors.message.MessageNotExistsError:
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
return {"result": "success"}
@@ -117,7 +121,7 @@ class MessageSuggestedApi(Resource):
questions = MessageService.get_suggested_questions_after_answer(
app_model=app_model, user=end_user, message_id=message_id, invoke_from=InvokeFrom.SERVICE_API
)
except services.errors.message.MessageNotExistsError:
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
except SuggestedQuestionsAfterAnswerDisabledError:
raise BadRequest("Suggested Questions Is Disabled.")

View File

@@ -5,7 +5,7 @@ from flask import request
from flask_restful import Resource, fields, marshal_with, reqparse
from flask_restful.inputs import int_range
from sqlalchemy.orm import Session, sessionmaker
from werkzeug.exceptions import InternalServerError
from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
from controllers.service_api import api
from controllers.service_api.app.error import (
@@ -34,6 +34,7 @@ from libs.helper import TimestampField
from models.model import App, AppMode, EndUser
from repositories.factory import DifyAPIRepositoryFactory
from services.app_generate_service import AppGenerateService
from services.errors.app import IsDraftWorkflowError, WorkflowIdFormatError, WorkflowNotFoundError
from services.errors.llm import InvokeRateLimitError
from services.workflow_app_service import WorkflowAppService
@@ -120,6 +121,59 @@ class WorkflowRunApi(Resource):
raise InternalServerError()
class WorkflowRunByIdApi(Resource):
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True))
def post(self, app_model: App, end_user: EndUser, workflow_id: str):
"""
Run specific workflow by ID
"""
app_mode = AppMode.value_of(app_model.mode)
if app_mode != AppMode.WORKFLOW:
raise NotWorkflowAppError()
parser = reqparse.RequestParser()
parser.add_argument("inputs", type=dict, required=True, nullable=False, location="json")
parser.add_argument("files", type=list, required=False, location="json")
parser.add_argument("response_mode", type=str, choices=["blocking", "streaming"], location="json")
args = parser.parse_args()
# Add workflow_id to args for AppGenerateService
args["workflow_id"] = workflow_id
external_trace_id = get_external_trace_id(request)
if external_trace_id:
args["external_trace_id"] = external_trace_id
streaming = args.get("response_mode") == "streaming"
try:
response = AppGenerateService.generate(
app_model=app_model, user=end_user, args=args, invoke_from=InvokeFrom.SERVICE_API, streaming=streaming
)
return helper.compact_generate_response(response)
except WorkflowNotFoundError as ex:
raise NotFound(str(ex))
except IsDraftWorkflowError as ex:
raise BadRequest(str(ex))
except WorkflowIdFormatError as ex:
raise BadRequest(str(ex))
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except InvokeRateLimitError as ex:
raise InvokeRateLimitHttpError(ex.description)
except InvokeError as e:
raise CompletionRequestError(e.description)
except ValueError as e:
raise e
except Exception:
logging.exception("internal server error.")
raise InternalServerError()
class WorkflowTaskStopApi(Resource):
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON, required=True))
def post(self, app_model: App, end_user: EndUser, task_id: str):
@@ -193,5 +247,6 @@ class WorkflowAppLogApi(Resource):
api.add_resource(WorkflowRunApi, "/workflows/run")
api.add_resource(WorkflowRunDetailApi, "/workflows/run/<string:workflow_run_id>")
api.add_resource(WorkflowRunByIdApi, "/workflows/<string:workflow_id>/run")
api.add_resource(WorkflowTaskStopApi, "/workflows/tasks/<string:task_id>/stop")
api.add_resource(WorkflowAppLogApi, "/workflows/logs")

View File

@@ -29,7 +29,7 @@ def _validate_name(name):
def _validate_description_length(description):
if len(description) > 400:
if description and len(description) > 400:
raise ValueError("Description cannot exceed 400 characters.")
return description
@@ -87,7 +87,7 @@ class DatasetListApi(DatasetApiResource):
)
parser.add_argument(
"description",
type=str,
type=_validate_description_length,
nullable=True,
required=False,
default="",

View File

@@ -234,8 +234,6 @@ class DocumentAddByFileApi(DatasetApiResource):
args["retrieval_model"].get("reranking_model").get("reranking_model_name"),
)
# save file info
file = request.files["file"]
# check file
if "file" not in request.files:
raise NoFileUploadedError()
@@ -243,6 +241,8 @@ class DocumentAddByFileApi(DatasetApiResource):
if len(request.files) > 1:
raise TooManyFilesError()
# save file info
file = request.files["file"]
if not file.filename:
raise FilenameNotExistsError
@@ -358,39 +358,6 @@ class DocumentUpdateByFileApi(DatasetApiResource):
return documents_and_batch_fields, 200
class DocumentDeleteApi(DatasetApiResource):
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def delete(self, tenant_id, dataset_id, document_id):
"""Delete document."""
document_id = str(document_id)
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
# get dataset info
dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
if not dataset:
raise ValueError("Dataset does not exist.")
document = DocumentService.get_document(dataset.id, document_id)
# 404 if document not found
if document is None:
raise NotFound("Document Not Exists.")
# 403 if document is archived
if DocumentService.check_archived(document):
raise ArchivedDocumentImmutableError()
try:
# delete document
DocumentService.delete_document(document)
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError("Cannot delete document during indexing.")
return 204
class DocumentListApi(DatasetApiResource):
def get(self, tenant_id, dataset_id):
dataset_id = str(dataset_id)
@@ -473,7 +440,7 @@ class DocumentIndexingStatusApi(DatasetApiResource):
return data
class DocumentDetailApi(DatasetApiResource):
class DocumentApi(DatasetApiResource):
METADATA_CHOICES = {"all", "only", "without"}
def get(self, tenant_id, dataset_id, document_id):
@@ -567,6 +534,37 @@ class DocumentDetailApi(DatasetApiResource):
return response
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def delete(self, tenant_id, dataset_id, document_id):
"""Delete document."""
document_id = str(document_id)
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
# get dataset info
dataset = db.session.query(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).first()
if not dataset:
raise ValueError("Dataset does not exist.")
document = DocumentService.get_document(dataset.id, document_id)
# 404 if document not found
if document is None:
raise NotFound("Document Not Exists.")
# 403 if document is archived
if DocumentService.check_archived(document):
raise ArchivedDocumentImmutableError()
try:
# delete document
DocumentService.delete_document(document)
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError("Cannot delete document during indexing.")
return 204
api.add_resource(
DocumentAddByTextApi,
@@ -588,7 +586,6 @@ api.add_resource(
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file",
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-file",
)
api.add_resource(DocumentDeleteApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
api.add_resource(DocumentApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
api.add_resource(DocumentListApi, "/datasets/<uuid:dataset_id>/documents")
api.add_resource(DocumentIndexingStatusApi, "/datasets/<uuid:dataset_id>/documents/<string:batch>/indexing-status")
api.add_resource(DocumentDetailApi, "/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")

View File

@@ -17,8 +17,8 @@ class DatasetMetadataCreateServiceApi(DatasetApiResource):
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id, dataset_id):
parser = reqparse.RequestParser()
parser.add_argument("type", type=str, required=True, nullable=True, location="json")
parser.add_argument("name", type=str, required=True, nullable=True, location="json")
parser.add_argument("type", type=str, required=True, nullable=False, location="json")
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
args = parser.parse_args()
metadata_args = MetadataArgs(**args)
@@ -43,7 +43,7 @@ class DatasetMetadataServiceApi(DatasetApiResource):
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def patch(self, tenant_id, dataset_id, metadata_id):
parser = reqparse.RequestParser()
parser.add_argument("name", type=str, required=True, nullable=True, location="json")
parser.add_argument("name", type=str, required=True, nullable=False, location="json")
args = parser.parse_args()
dataset_id_str = str(dataset_id)
@@ -101,7 +101,7 @@ class DocumentMetadataEditServiceApi(DatasetApiResource):
DatasetService.check_dataset_permission(dataset, current_user)
parser = reqparse.RequestParser()
parser.add_argument("operation_data", type=list, required=True, nullable=True, location="json")
parser.add_argument("operation_data", type=list, required=True, nullable=False, location="json")
args = parser.parse_args()
metadata_args = MetadataOperationData(**args)

View File

@@ -1,5 +1,6 @@
from flask import request
from flask_restful import Resource, marshal_with, reqparse
from werkzeug.exceptions import Unauthorized
from controllers.common import fields
from controllers.web import api
@@ -75,14 +76,14 @@ class AppWebAuthPermission(Resource):
try:
auth_header = request.headers.get("Authorization")
if auth_header is None:
raise
raise Unauthorized("Authorization header is missing.")
if " " not in auth_header:
raise
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
auth_scheme, tk = auth_header.split(None, 1)
auth_scheme = auth_scheme.lower()
if auth_scheme != "bearer":
raise
raise Unauthorized("Authorization scheme must be 'Bearer'")
decoded = PassportService().verify(tk)
user_id = decoded.get("user_id", "visitor")

View File

@@ -12,18 +12,17 @@ from services.file_service import FileService
class FileApi(WebApiResource):
@marshal_with(file_fields)
def post(self, app_model, end_user):
file = request.files["file"]
source = request.form.get("source")
if "file" not in request.files:
raise NoFileUploadedError()
if len(request.files) > 1:
raise TooManyFilesError()
file = request.files["file"]
if not file.filename:
raise FilenameNotExistsError
source = request.form.get("source")
if source not in ("datasets", None):
source = None

View File

@@ -4,7 +4,6 @@ from flask_restful import fields, marshal_with, reqparse
from flask_restful.inputs import int_range
from werkzeug.exceptions import InternalServerError, NotFound
import services
from controllers.web import api
from controllers.web.error import (
AppMoreLikeThisDisabledError,
@@ -29,7 +28,11 @@ from models.model import AppMode
from services.app_generate_service import AppGenerateService
from services.errors.app import MoreLikeThisDisabledError
from services.errors.conversation import ConversationNotExistsError
from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
from services.errors.message import (
FirstMessageNotExistsError,
MessageNotExistsError,
SuggestedQuestionsAfterAnswerDisabledError,
)
from services.message_service import MessageService
@@ -73,9 +76,9 @@ class MessageListApi(WebApiResource):
return MessageService.pagination_by_first_id(
app_model, end_user, args["conversation_id"], args["first_id"], args["limit"]
)
except services.errors.conversation.ConversationNotExistsError:
except ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.message.FirstMessageNotExistsError:
except FirstMessageNotExistsError:
raise NotFound("First Message Not Exists.")
@@ -96,7 +99,7 @@ class MessageFeedbackApi(WebApiResource):
rating=args.get("rating"),
content=args.get("content"),
)
except services.errors.message.MessageNotExistsError:
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
return {"result": "success"}

View File

@@ -148,6 +148,8 @@ SupportedComparisonOperator = Literal[
"is not",
"empty",
"not empty",
"in",
"not in",
# for number
"=",
"",

View File

@@ -118,26 +118,8 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
):
return
# Init conversation variables
stmt = select(ConversationVariable).where(
ConversationVariable.app_id == self.conversation.app_id,
ConversationVariable.conversation_id == self.conversation.id,
)
with Session(db.engine) as session:
db_conversation_variables = session.scalars(stmt).all()
if not db_conversation_variables:
# Create conversation variables if they don't exist.
db_conversation_variables = [
ConversationVariable.from_variable(
app_id=self.conversation.app_id, conversation_id=self.conversation.id, variable=variable
)
for variable in self._workflow.conversation_variables
]
session.add_all(db_conversation_variables)
# Convert database entities to variables.
conversation_variables = [item.to_variable() for item in db_conversation_variables]
session.commit()
# Initialize conversation variables
conversation_variables = self._initialize_conversation_variables()
# Create a variable pool.
system_inputs = SystemVariable(
@@ -292,3 +274,100 @@ class AdvancedChatAppRunner(WorkflowBasedAppRunner):
message_id=message_id,
trace_manager=app_generate_entity.trace_manager,
)
def _initialize_conversation_variables(self) -> list[VariableUnion]:
"""
Initialize conversation variables for the current conversation.
This method:
1. Loads existing variables from the database
2. Creates new variables if none exist
3. Syncs missing variables from the workflow definition
:return: List of conversation variables ready for use
"""
with Session(db.engine) as session:
existing_variables = self._load_existing_conversation_variables(session)
if not existing_variables:
# First time initialization - create all variables
existing_variables = self._create_all_conversation_variables(session)
else:
# Check and add any missing variables from the workflow
existing_variables = self._sync_missing_conversation_variables(session, existing_variables)
# Convert to Variable objects for use in the workflow
conversation_variables = [var.to_variable() for var in existing_variables]
session.commit()
return cast(list[VariableUnion], conversation_variables)
def _load_existing_conversation_variables(self, session: Session) -> list[ConversationVariable]:
"""
Load existing conversation variables from the database.
:param session: Database session
:return: List of existing conversation variables
"""
stmt = select(ConversationVariable).where(
ConversationVariable.app_id == self.conversation.app_id,
ConversationVariable.conversation_id == self.conversation.id,
)
return list(session.scalars(stmt).all())
def _create_all_conversation_variables(self, session: Session) -> list[ConversationVariable]:
"""
Create all conversation variables for a new conversation.
:param session: Database session
:return: List of created conversation variables
"""
new_variables = [
ConversationVariable.from_variable(
app_id=self.conversation.app_id, conversation_id=self.conversation.id, variable=variable
)
for variable in self._workflow.conversation_variables
]
if new_variables:
session.add_all(new_variables)
return new_variables
def _sync_missing_conversation_variables(
self, session: Session, existing_variables: list[ConversationVariable]
) -> list[ConversationVariable]:
"""
Sync missing conversation variables from the workflow definition.
This handles the case where new variables are added to a workflow
after conversations have already been created.
:param session: Database session
:param existing_variables: List of existing conversation variables
:return: Updated list including any newly created variables
"""
# Get IDs of existing and workflow variables
existing_ids = {var.id for var in existing_variables}
workflow_variables = {var.id: var for var in self._workflow.conversation_variables}
# Find missing variable IDs
missing_ids = set(workflow_variables.keys()) - existing_ids
if not missing_ids:
return existing_variables
# Create missing variables with their default values
new_variables = [
ConversationVariable.from_variable(
app_id=self.conversation.app_id,
conversation_id=self.conversation.id,
variable=workflow_variables[var_id],
)
for var_id in missing_ids
]
session.add_all(new_variables)
# Return combined list
return existing_variables + new_variables

View File

@@ -9,7 +9,6 @@ from core.app.app_config.entities import EasyUIBasedAppConfig, WorkflowUIBasedAp
from core.entities.provider_configuration import ProviderModelBundle
from core.file import File, FileUploadConfig
from core.model_runtime.entities.model_entities import AIModelEntity
from core.ops.ops_trace_manager import TraceQueueManager
class InvokeFrom(Enum):
@@ -114,7 +113,8 @@ class AppGenerateEntity(BaseModel):
extras: dict[str, Any] = Field(default_factory=dict)
# tracing instance
trace_manager: Optional[TraceQueueManager] = None
# Using Any to avoid circular import with TraceQueueManager
trace_manager: Optional[Any] = None
class EasyUIBasedAppGenerateEntity(AppGenerateEntity):

View File

@@ -23,6 +23,7 @@ from core.app.entities.task_entities import (
MessageFileStreamResponse,
MessageReplaceStreamResponse,
MessageStreamResponse,
StreamEvent,
WorkflowTaskState,
)
from core.llm_generator.llm_generator import LLMGenerator
@@ -180,11 +181,15 @@ class MessageCycleManager:
:param message_id: message id
:return:
"""
message_file = db.session.query(MessageFile).filter(MessageFile.id == message_id).first()
event_type = StreamEvent.MESSAGE_FILE if message_file else StreamEvent.MESSAGE
return MessageStreamResponse(
task_id=self._application_generate_entity.task_id,
id=message_id,
answer=answer,
from_variable_selector=from_variable_selector,
event=event_type,
)
def message_replace_to_stream_response(self, answer: str, reason: str = "") -> MessageReplaceStreamResponse:

View File

@@ -843,7 +843,7 @@ class ProviderConfiguration(BaseModel):
continue
status = ModelStatus.ACTIVE
if m.model in model_setting_map:
if m.model_type in model_setting_map and m.model in model_setting_map[m.model_type]:
model_setting = model_setting_map[m.model_type][m.model]
if model_setting.enabled is False:
status = ModelStatus.DISABLED

View File

@@ -176,7 +176,7 @@ class ProviderConfig(BasicProviderConfig):
scope: AppSelectorScope | ModelSelectorScope | ToolSelectorScope | None = None
required: bool = False
default: Optional[Union[int, str]] = None
default: Optional[Union[int, str, float, bool]] = None
options: Optional[list[Option]] = None
label: Optional[I18nObject] = None
help: Optional[I18nObject] = None

View File

@@ -32,7 +32,7 @@ def get_attr(*, file: File, attr: FileAttribute):
case FileAttribute.TRANSFER_METHOD:
return file.transfer_method.value
case FileAttribute.URL:
return file.remote_url
return _to_url(file)
case FileAttribute.EXTENSION:
return file.extension
case FileAttribute.RELATED_ID:

View File

@@ -121,9 +121,8 @@ class TokenBufferMemory:
curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)
if curr_message_tokens > max_token_limit:
pruned_memory = []
while curr_message_tokens > max_token_limit and len(prompt_messages) > 1:
pruned_memory.append(prompt_messages.pop(0))
prompt_messages.pop(0)
curr_message_tokens = self.model_instance.get_llm_num_tokens(prompt_messages)
return prompt_messages

View File

@@ -10,6 +10,7 @@ from sqlalchemy.orm import Session, sessionmaker
from core.ops.aliyun_trace.data_exporter.traceclient import (
TraceClient,
convert_datetime_to_nanoseconds,
convert_string_to_id,
convert_to_span_id,
convert_to_trace_id,
generate_span_id,
@@ -101,8 +102,9 @@ class AliyunDataTrace(BaseTraceInstance):
raise ValueError(f"Aliyun get run url failed: {str(e)}")
def workflow_trace(self, trace_info: WorkflowTraceInfo):
external_trace_id = trace_info.metadata.get("external_trace_id")
trace_id = external_trace_id or convert_to_trace_id(trace_info.workflow_run_id)
trace_id = convert_to_trace_id(trace_info.workflow_run_id)
if trace_info.trace_id:
trace_id = convert_string_to_id(trace_info.trace_id)
workflow_span_id = convert_to_span_id(trace_info.workflow_run_id, "workflow")
self.add_workflow_span(trace_id, workflow_span_id, trace_info)
@@ -130,6 +132,9 @@ class AliyunDataTrace(BaseTraceInstance):
status = Status(StatusCode.ERROR, trace_info.error)
trace_id = convert_to_trace_id(message_id)
if trace_info.trace_id:
trace_id = convert_string_to_id(trace_info.trace_id)
message_span_id = convert_to_span_id(message_id, "message")
message_span = SpanData(
trace_id=trace_id,
@@ -186,9 +191,13 @@ class AliyunDataTrace(BaseTraceInstance):
return
message_id = trace_info.message_id
trace_id = convert_to_trace_id(message_id)
if trace_info.trace_id:
trace_id = convert_string_to_id(trace_info.trace_id)
documents_data = extract_retrieval_documents(trace_info.documents)
dataset_retrieval_span = SpanData(
trace_id=convert_to_trace_id(message_id),
trace_id=trace_id,
parent_span_id=convert_to_span_id(message_id, "message"),
span_id=generate_span_id(),
name="dataset_retrieval",
@@ -214,8 +223,12 @@ class AliyunDataTrace(BaseTraceInstance):
if trace_info.error:
status = Status(StatusCode.ERROR, trace_info.error)
trace_id = convert_to_trace_id(message_id)
if trace_info.trace_id:
trace_id = convert_string_to_id(trace_info.trace_id)
tool_span = SpanData(
trace_id=convert_to_trace_id(message_id),
trace_id=trace_id,
parent_span_id=convert_to_span_id(message_id, "message"),
span_id=generate_span_id(),
name=trace_info.tool_name,
@@ -451,8 +464,13 @@ class AliyunDataTrace(BaseTraceInstance):
status: Status = Status(StatusCode.OK)
if trace_info.error:
status = Status(StatusCode.ERROR, trace_info.error)
trace_id = convert_to_trace_id(message_id)
if trace_info.trace_id:
trace_id = convert_string_to_id(trace_info.trace_id)
suggested_question_span = SpanData(
trace_id=convert_to_trace_id(message_id),
trace_id=trace_id,
parent_span_id=convert_to_span_id(message_id, "message"),
span_id=convert_to_span_id(message_id, "suggested_question"),
name="suggested_question",

View File

@@ -181,15 +181,21 @@ def convert_to_trace_id(uuid_v4: Optional[str]) -> int:
raise ValueError(f"Invalid UUID input: {e}")
def convert_string_to_id(string: Optional[str]) -> int:
if not string:
return generate_span_id()
hash_bytes = hashlib.sha256(string.encode("utf-8")).digest()
id = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False)
return id
def convert_to_span_id(uuid_v4: Optional[str], span_type: str) -> int:
try:
uuid_obj = uuid.UUID(uuid_v4)
except Exception as e:
raise ValueError(f"Invalid UUID input: {e}")
combined_key = f"{uuid_obj.hex}-{span_type}"
hash_bytes = hashlib.sha256(combined_key.encode("utf-8")).digest()
span_id = int.from_bytes(hash_bytes[:8], byteorder="big", signed=False)
return span_id
return convert_string_to_id(combined_key)
def convert_datetime_to_nanoseconds(start_time_a: Optional[datetime]) -> Optional[int]:

View File

@@ -4,6 +4,7 @@ import logging
import os
from datetime import datetime, timedelta
from typing import Any, Optional, Union, cast
from urllib.parse import urlparse
from openinference.semconv.trace import OpenInferenceSpanKindValues, SpanAttributes
from opentelemetry import trace
@@ -40,8 +41,14 @@ def setup_tracer(arize_phoenix_config: ArizeConfig | PhoenixConfig) -> tuple[tra
try:
# Choose the appropriate exporter based on config type
exporter: Union[GrpcOTLPSpanExporter, HttpOTLPSpanExporter]
# Inspect the provided endpoint to determine its structure
parsed = urlparse(arize_phoenix_config.endpoint)
base_endpoint = f"{parsed.scheme}://{parsed.netloc}"
path = parsed.path.rstrip("/")
if isinstance(arize_phoenix_config, ArizeConfig):
arize_endpoint = f"{arize_phoenix_config.endpoint}/v1"
arize_endpoint = f"{base_endpoint}/v1"
arize_headers = {
"api_key": arize_phoenix_config.api_key or "",
"space_id": arize_phoenix_config.space_id or "",
@@ -53,7 +60,7 @@ def setup_tracer(arize_phoenix_config: ArizeConfig | PhoenixConfig) -> tuple[tra
timeout=30,
)
else:
phoenix_endpoint = f"{arize_phoenix_config.endpoint}/v1/traces"
phoenix_endpoint = f"{base_endpoint}{path}/v1/traces"
phoenix_headers = {
"api_key": arize_phoenix_config.api_key or "",
"authorization": f"Bearer {arize_phoenix_config.api_key or ''}",
@@ -91,16 +98,21 @@ def datetime_to_nanos(dt: Optional[datetime]) -> int:
return int(dt.timestamp() * 1_000_000_000)
def uuid_to_trace_id(string: Optional[str]) -> int:
"""Convert UUID string to a valid trace ID (16-byte integer)."""
def string_to_trace_id128(string: Optional[str]) -> int:
"""
Convert any input string into a stable 128-bit integer trace ID.
This uses SHA-256 hashing and takes the first 16 bytes (128 bits) of the digest.
It's suitable for generating consistent, unique identifiers from strings.
"""
if string is None:
string = ""
hash_object = hashlib.sha256(string.encode())
# Take the first 16 bytes (128 bits) of the hash
# Take the first 16 bytes (128 bits) of the hash digest
digest = hash_object.digest()[:16]
# Convert to integer (128 bits)
# Convert to a 128-bit integer
return int.from_bytes(digest, byteorder="big")
@@ -153,8 +165,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
}
workflow_metadata.update(trace_info.metadata)
external_trace_id = trace_info.metadata.get("external_trace_id")
trace_id = external_trace_id or uuid_to_trace_id(trace_info.workflow_run_id)
trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.workflow_run_id)
span_id = RandomIdGenerator().generate_span_id()
context = SpanContext(
trace_id=trace_id,
@@ -310,7 +321,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
SpanAttributes.SESSION_ID: trace_info.message_data.conversation_id,
}
trace_id = uuid_to_trace_id(trace_info.message_id)
trace_id = string_to_trace_id128(trace_info.trace_id or trace_info.message_id)
message_span_id = RandomIdGenerator().generate_span_id()
span_context = SpanContext(
trace_id=trace_id,
@@ -406,7 +417,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
}
metadata.update(trace_info.metadata)
trace_id = uuid_to_trace_id(trace_info.message_id)
trace_id = string_to_trace_id128(trace_info.message_id)
span_id = RandomIdGenerator().generate_span_id()
context = SpanContext(
trace_id=trace_id,
@@ -468,7 +479,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
}
metadata.update(trace_info.metadata)
trace_id = uuid_to_trace_id(trace_info.message_id)
trace_id = string_to_trace_id128(trace_info.message_id)
span_id = RandomIdGenerator().generate_span_id()
context = SpanContext(
trace_id=trace_id,
@@ -521,7 +532,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
}
metadata.update(trace_info.metadata)
trace_id = uuid_to_trace_id(trace_info.message_id)
trace_id = string_to_trace_id128(trace_info.message_id)
span_id = RandomIdGenerator().generate_span_id()
context = SpanContext(
trace_id=trace_id,
@@ -568,7 +579,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
"tool_config": json.dumps(trace_info.tool_config, ensure_ascii=False),
}
trace_id = uuid_to_trace_id(trace_info.message_id)
trace_id = string_to_trace_id128(trace_info.message_id)
tool_span_id = RandomIdGenerator().generate_span_id()
logger.info("[Arize/Phoenix] Creating tool trace with trace_id: %s, span_id: %s", trace_id, tool_span_id)
@@ -629,7 +640,7 @@ class ArizePhoenixDataTrace(BaseTraceInstance):
}
metadata.update(trace_info.metadata)
trace_id = uuid_to_trace_id(trace_info.message_id)
trace_id = string_to_trace_id128(trace_info.message_id)
span_id = RandomIdGenerator().generate_span_id()
context = SpanContext(
trace_id=trace_id,

View File

@@ -87,7 +87,7 @@ class PhoenixConfig(BaseTracingConfig):
@field_validator("endpoint")
@classmethod
def endpoint_validator(cls, v, info: ValidationInfo):
return cls.validate_endpoint_url(v, "https://app.phoenix.arize.com")
return validate_url_with_path(v, "https://app.phoenix.arize.com")
class LangfuseConfig(BaseTracingConfig):

View File

@@ -14,6 +14,7 @@ class BaseTraceInfo(BaseModel):
start_time: Optional[datetime] = None
end_time: Optional[datetime] = None
metadata: dict[str, Any]
trace_id: Optional[str] = None
@field_validator("inputs", "outputs")
@classmethod

View File

@@ -67,14 +67,13 @@ class LangFuseDataTrace(BaseTraceInstance):
self.generate_name_trace(trace_info)
def workflow_trace(self, trace_info: WorkflowTraceInfo):
external_trace_id = trace_info.metadata.get("external_trace_id")
trace_id = external_trace_id or trace_info.workflow_run_id
trace_id = trace_info.trace_id or trace_info.workflow_run_id
user_id = trace_info.metadata.get("user_id")
metadata = trace_info.metadata
metadata["workflow_app_log_id"] = trace_info.workflow_app_log_id
if trace_info.message_id:
trace_id = external_trace_id or trace_info.message_id
trace_id = trace_info.trace_id or trace_info.message_id
name = TraceTaskName.MESSAGE_TRACE.value
trace_data = LangfuseTrace(
id=trace_id,
@@ -250,8 +249,10 @@ class LangFuseDataTrace(BaseTraceInstance):
user_id = end_user_data.session_id
metadata["user_id"] = user_id
trace_id = trace_info.trace_id or message_id
trace_data = LangfuseTrace(
id=message_id,
id=trace_id,
user_id=user_id,
name=TraceTaskName.MESSAGE_TRACE.value,
input={
@@ -285,7 +286,7 @@ class LangFuseDataTrace(BaseTraceInstance):
langfuse_generation_data = LangfuseGeneration(
name="llm",
trace_id=message_id,
trace_id=trace_id,
start_time=trace_info.start_time,
end_time=trace_info.end_time,
model=message_data.model_id,
@@ -311,7 +312,7 @@ class LangFuseDataTrace(BaseTraceInstance):
"preset_response": trace_info.preset_response,
"inputs": trace_info.inputs,
},
trace_id=trace_info.message_id,
trace_id=trace_info.trace_id or trace_info.message_id,
start_time=trace_info.start_time or trace_info.message_data.created_at,
end_time=trace_info.end_time or trace_info.message_data.created_at,
metadata=trace_info.metadata,
@@ -334,7 +335,7 @@ class LangFuseDataTrace(BaseTraceInstance):
name=TraceTaskName.SUGGESTED_QUESTION_TRACE.value,
input=trace_info.inputs,
output=str(trace_info.suggested_question),
trace_id=trace_info.message_id,
trace_id=trace_info.trace_id or trace_info.message_id,
start_time=trace_info.start_time,
end_time=trace_info.end_time,
metadata=trace_info.metadata,
@@ -352,7 +353,7 @@ class LangFuseDataTrace(BaseTraceInstance):
name=TraceTaskName.DATASET_RETRIEVAL_TRACE.value,
input=trace_info.inputs,
output={"documents": trace_info.documents},
trace_id=trace_info.message_id,
trace_id=trace_info.trace_id or trace_info.message_id,
start_time=trace_info.start_time or trace_info.message_data.created_at,
end_time=trace_info.end_time or trace_info.message_data.updated_at,
metadata=trace_info.metadata,
@@ -365,7 +366,7 @@ class LangFuseDataTrace(BaseTraceInstance):
name=trace_info.tool_name,
input=trace_info.tool_inputs,
output=trace_info.tool_outputs,
trace_id=trace_info.message_id,
trace_id=trace_info.trace_id or trace_info.message_id,
start_time=trace_info.start_time,
end_time=trace_info.end_time,
metadata=trace_info.metadata,

View File

@@ -65,8 +65,7 @@ class LangSmithDataTrace(BaseTraceInstance):
self.generate_name_trace(trace_info)
def workflow_trace(self, trace_info: WorkflowTraceInfo):
external_trace_id = trace_info.metadata.get("external_trace_id")
trace_id = external_trace_id or trace_info.message_id or trace_info.workflow_run_id
trace_id = trace_info.trace_id or trace_info.message_id or trace_info.workflow_run_id
if trace_info.start_time is None:
trace_info.start_time = datetime.now()
message_dotted_order = (
@@ -290,7 +289,7 @@ class LangSmithDataTrace(BaseTraceInstance):
reference_example_id=None,
input_attachments={},
output_attachments={},
trace_id=None,
trace_id=trace_info.trace_id,
dotted_order=None,
parent_run_id=None,
)
@@ -319,7 +318,7 @@ class LangSmithDataTrace(BaseTraceInstance):
reference_example_id=None,
input_attachments={},
output_attachments={},
trace_id=None,
trace_id=trace_info.trace_id,
dotted_order=None,
id=str(uuid.uuid4()),
)
@@ -351,7 +350,7 @@ class LangSmithDataTrace(BaseTraceInstance):
reference_example_id=None,
input_attachments={},
output_attachments={},
trace_id=None,
trace_id=trace_info.trace_id,
dotted_order=None,
error="",
file_list=[],
@@ -381,7 +380,7 @@ class LangSmithDataTrace(BaseTraceInstance):
reference_example_id=None,
input_attachments={},
output_attachments={},
trace_id=None,
trace_id=trace_info.trace_id,
dotted_order=None,
error="",
file_list=[],
@@ -410,7 +409,7 @@ class LangSmithDataTrace(BaseTraceInstance):
reference_example_id=None,
input_attachments={},
output_attachments={},
trace_id=None,
trace_id=trace_info.trace_id,
dotted_order=None,
error="",
file_list=[],
@@ -440,7 +439,7 @@ class LangSmithDataTrace(BaseTraceInstance):
reference_example_id=None,
input_attachments={},
output_attachments={},
trace_id=None,
trace_id=trace_info.trace_id,
dotted_order=None,
error=trace_info.error or "",
)
@@ -465,7 +464,7 @@ class LangSmithDataTrace(BaseTraceInstance):
reference_example_id=None,
input_attachments={},
output_attachments={},
trace_id=None,
trace_id=trace_info.trace_id,
dotted_order=None,
error="",
file_list=[],

View File

@@ -96,8 +96,7 @@ class OpikDataTrace(BaseTraceInstance):
self.generate_name_trace(trace_info)
def workflow_trace(self, trace_info: WorkflowTraceInfo):
external_trace_id = trace_info.metadata.get("external_trace_id")
dify_trace_id = external_trace_id or trace_info.workflow_run_id
dify_trace_id = trace_info.trace_id or trace_info.workflow_run_id
opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)
workflow_metadata = wrap_metadata(
trace_info.metadata, message_id=trace_info.message_id, workflow_app_log_id=trace_info.workflow_app_log_id
@@ -105,7 +104,7 @@ class OpikDataTrace(BaseTraceInstance):
root_span_id = None
if trace_info.message_id:
dify_trace_id = external_trace_id or trace_info.message_id
dify_trace_id = trace_info.trace_id or trace_info.message_id
opik_trace_id = prepare_opik_uuid(trace_info.start_time, dify_trace_id)
trace_data = {
@@ -276,7 +275,7 @@ class OpikDataTrace(BaseTraceInstance):
return
metadata = trace_info.metadata
message_id = trace_info.message_id
dify_trace_id = trace_info.trace_id or trace_info.message_id
user_id = message_data.from_account_id
metadata["user_id"] = user_id
@@ -291,7 +290,7 @@ class OpikDataTrace(BaseTraceInstance):
metadata["end_user_id"] = end_user_id
trace_data = {
"id": prepare_opik_uuid(trace_info.start_time, message_id),
"id": prepare_opik_uuid(trace_info.start_time, dify_trace_id),
"name": TraceTaskName.MESSAGE_TRACE.value,
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,
@@ -330,7 +329,7 @@ class OpikDataTrace(BaseTraceInstance):
start_time = trace_info.start_time or trace_info.message_data.created_at
span_data = {
"trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
"trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id),
"name": TraceTaskName.MODERATION_TRACE.value,
"type": "tool",
"start_time": start_time,
@@ -356,7 +355,7 @@ class OpikDataTrace(BaseTraceInstance):
start_time = trace_info.start_time or message_data.created_at
span_data = {
"trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
"trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id),
"name": TraceTaskName.SUGGESTED_QUESTION_TRACE.value,
"type": "tool",
"start_time": start_time,
@@ -376,7 +375,7 @@ class OpikDataTrace(BaseTraceInstance):
start_time = trace_info.start_time or trace_info.message_data.created_at
span_data = {
"trace_id": prepare_opik_uuid(start_time, trace_info.message_id),
"trace_id": prepare_opik_uuid(start_time, trace_info.trace_id or trace_info.message_id),
"name": TraceTaskName.DATASET_RETRIEVAL_TRACE.value,
"type": "tool",
"start_time": start_time,
@@ -391,7 +390,7 @@ class OpikDataTrace(BaseTraceInstance):
def tool_trace(self, trace_info: ToolTraceInfo):
span_data = {
"trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
"trace_id": prepare_opik_uuid(trace_info.start_time, trace_info.trace_id or trace_info.message_id),
"name": trace_info.tool_name,
"type": "tool",
"start_time": trace_info.start_time,
@@ -406,7 +405,7 @@ class OpikDataTrace(BaseTraceInstance):
def generate_name_trace(self, trace_info: GenerateNameTraceInfo):
trace_data = {
"id": prepare_opik_uuid(trace_info.start_time, trace_info.message_id),
"id": prepare_opik_uuid(trace_info.start_time, trace_info.trace_id or trace_info.message_id),
"name": TraceTaskName.GENERATE_NAME_TRACE.value,
"start_time": trace_info.start_time,
"end_time": trace_info.end_time,

View File

@@ -322,7 +322,7 @@ class OpsTraceManager:
:return:
"""
# auth check
if enabled == True:
if enabled:
try:
provider_config_map[tracing_provider]
except KeyError:
@@ -422,8 +422,11 @@ class TraceTask:
self.timer = timer
self.file_base_url = os.getenv("FILES_URL", "http://127.0.0.1:5001")
self.app_id = None
self.trace_id = None
self.kwargs = kwargs
external_trace_id = kwargs.get("external_trace_id")
if external_trace_id:
self.trace_id = external_trace_id
def execute(self):
return self.preprocess()
@@ -520,11 +523,8 @@ class TraceTask:
"app_id": workflow_run.app_id,
}
external_trace_id = self.kwargs.get("external_trace_id")
if external_trace_id:
metadata["external_trace_id"] = external_trace_id
workflow_trace_info = WorkflowTraceInfo(
trace_id=self.trace_id,
workflow_data=workflow_run.to_dict(),
conversation_id=conversation_id,
workflow_id=workflow_id,
@@ -584,6 +584,7 @@ class TraceTask:
message_tokens = message_data.message_tokens
message_trace_info = MessageTraceInfo(
trace_id=self.trace_id,
message_id=message_id,
message_data=message_data.to_dict(),
conversation_model=conversation_mode,
@@ -627,6 +628,7 @@ class TraceTask:
workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None
moderation_trace_info = ModerationTraceInfo(
trace_id=self.trace_id,
message_id=workflow_app_log_id or message_id,
inputs=inputs,
message_data=message_data.to_dict(),
@@ -667,6 +669,7 @@ class TraceTask:
workflow_app_log_id = str(workflow_app_log_data.id) if workflow_app_log_data else None
suggested_question_trace_info = SuggestedQuestionTraceInfo(
trace_id=self.trace_id,
message_id=workflow_app_log_id or message_id,
message_data=message_data.to_dict(),
inputs=message_data.message,
@@ -708,6 +711,7 @@ class TraceTask:
}
dataset_retrieval_trace_info = DatasetRetrievalTraceInfo(
trace_id=self.trace_id,
message_id=message_id,
inputs=message_data.query or message_data.inputs,
documents=[doc.model_dump() for doc in documents] if documents else [],
@@ -772,6 +776,7 @@ class TraceTask:
)
tool_trace_info = ToolTraceInfo(
trace_id=self.trace_id,
message_id=message_id,
message_data=message_data.to_dict(),
tool_name=tool_name,
@@ -807,6 +812,7 @@ class TraceTask:
}
generate_name_trace_info = GenerateNameTraceInfo(
trace_id=self.trace_id,
conversation_id=conversation_id,
inputs=inputs,
outputs=generate_conversation_name,

View File

@@ -87,8 +87,7 @@ class WeaveDataTrace(BaseTraceInstance):
self.generate_name_trace(trace_info)
def workflow_trace(self, trace_info: WorkflowTraceInfo):
external_trace_id = trace_info.metadata.get("external_trace_id")
trace_id = external_trace_id or trace_info.message_id or trace_info.workflow_run_id
trace_id = trace_info.trace_id or trace_info.message_id or trace_info.workflow_run_id
if trace_info.start_time is None:
trace_info.start_time = datetime.now()
@@ -245,8 +244,12 @@ class WeaveDataTrace(BaseTraceInstance):
attributes["start_time"] = trace_info.start_time
attributes["end_time"] = trace_info.end_time
attributes["tags"] = ["message", str(trace_info.conversation_mode)]
trace_id = trace_info.trace_id or message_id
attributes["trace_id"] = trace_id
message_run = WeaveTraceModel(
id=message_id,
id=trace_id,
op=str(TraceTaskName.MESSAGE_TRACE.value),
input_tokens=trace_info.message_tokens,
output_tokens=trace_info.answer_tokens,
@@ -274,7 +277,7 @@ class WeaveDataTrace(BaseTraceInstance):
)
self.start_call(
llm_run,
parent_run_id=message_id,
parent_run_id=trace_id,
)
self.finish_call(llm_run)
self.finish_call(message_run)
@@ -289,6 +292,9 @@ class WeaveDataTrace(BaseTraceInstance):
attributes["start_time"] = trace_info.start_time or trace_info.message_data.created_at
attributes["end_time"] = trace_info.end_time or trace_info.message_data.updated_at
trace_id = trace_info.trace_id or trace_info.message_id
attributes["trace_id"] = trace_id
moderation_run = WeaveTraceModel(
id=str(uuid.uuid4()),
op=str(TraceTaskName.MODERATION_TRACE.value),
@@ -303,7 +309,7 @@ class WeaveDataTrace(BaseTraceInstance):
exception=getattr(trace_info, "error", None),
file_list=[],
)
self.start_call(moderation_run, parent_run_id=trace_info.message_id)
self.start_call(moderation_run, parent_run_id=trace_id)
self.finish_call(moderation_run)
def suggested_question_trace(self, trace_info: SuggestedQuestionTraceInfo):
@@ -316,6 +322,9 @@ class WeaveDataTrace(BaseTraceInstance):
attributes["start_time"] = (trace_info.start_time or message_data.created_at,)
attributes["end_time"] = (trace_info.end_time or message_data.updated_at,)
trace_id = trace_info.trace_id or trace_info.message_id
attributes["trace_id"] = trace_id
suggested_question_run = WeaveTraceModel(
id=str(uuid.uuid4()),
op=str(TraceTaskName.SUGGESTED_QUESTION_TRACE.value),
@@ -326,7 +335,7 @@ class WeaveDataTrace(BaseTraceInstance):
file_list=[],
)
self.start_call(suggested_question_run, parent_run_id=trace_info.message_id)
self.start_call(suggested_question_run, parent_run_id=trace_id)
self.finish_call(suggested_question_run)
def dataset_retrieval_trace(self, trace_info: DatasetRetrievalTraceInfo):
@@ -338,6 +347,9 @@ class WeaveDataTrace(BaseTraceInstance):
attributes["start_time"] = (trace_info.start_time or trace_info.message_data.created_at,)
attributes["end_time"] = (trace_info.end_time or trace_info.message_data.updated_at,)
trace_id = trace_info.trace_id or trace_info.message_id
attributes["trace_id"] = trace_id
dataset_retrieval_run = WeaveTraceModel(
id=str(uuid.uuid4()),
op=str(TraceTaskName.DATASET_RETRIEVAL_TRACE.value),
@@ -348,7 +360,7 @@ class WeaveDataTrace(BaseTraceInstance):
file_list=[],
)
self.start_call(dataset_retrieval_run, parent_run_id=trace_info.message_id)
self.start_call(dataset_retrieval_run, parent_run_id=trace_id)
self.finish_call(dataset_retrieval_run)
def tool_trace(self, trace_info: ToolTraceInfo):
@@ -357,6 +369,11 @@ class WeaveDataTrace(BaseTraceInstance):
attributes["start_time"] = trace_info.start_time
attributes["end_time"] = trace_info.end_time
message_id = trace_info.message_id or getattr(trace_info, "conversation_id", None)
message_id = message_id or None
trace_id = trace_info.trace_id or message_id
attributes["trace_id"] = trace_id
tool_run = WeaveTraceModel(
id=str(uuid.uuid4()),
op=trace_info.tool_name,
@@ -366,9 +383,7 @@ class WeaveDataTrace(BaseTraceInstance):
attributes=attributes,
exception=trace_info.error,
)
message_id = trace_info.message_id or getattr(trace_info, "conversation_id", None)
message_id = message_id or None
self.start_call(tool_run, parent_run_id=message_id)
self.start_call(tool_run, parent_run_id=trace_id)
self.finish_call(tool_run)
def generate_name_trace(self, trace_info: GenerateNameTraceInfo):

View File

@@ -208,6 +208,7 @@ class BasePluginClient:
except Exception:
raise PluginDaemonInnerError(code=rep.code, message=rep.message)
logger.error("Error in stream reponse for plugin %s", rep.__dict__)
self._handle_plugin_daemon_error(error.error_type, error.message)
raise ValueError(f"plugin daemon: {rep.message}, code: {rep.code}")
if rep.data is None:

View File

@@ -2,6 +2,8 @@ from collections.abc import Mapping
from pydantic import TypeAdapter
from extensions.ext_logging import get_request_id
class PluginDaemonError(Exception):
"""Base class for all plugin daemon errors."""
@@ -11,7 +13,7 @@ class PluginDaemonError(Exception):
def __str__(self) -> str:
# returns the class name and description
return f"{self.__class__.__name__}: {self.description}"
return f"req_id: {get_request_id()} {self.__class__.__name__}: {self.description}"
class PluginDaemonInternalError(PluginDaemonError):

View File

@@ -0,0 +1,190 @@
# Clickzetta Vector Database Integration
This module provides integration with Clickzetta Lakehouse as a vector database for Dify.
## Features
- **Vector Storage**: Store and retrieve high-dimensional vectors using Clickzetta's native VECTOR type
- **Vector Search**: Efficient similarity search using HNSW algorithm
- **Full-Text Search**: Leverage Clickzetta's inverted index for powerful text search capabilities
- **Hybrid Search**: Combine vector similarity and full-text search for better results
- **Multi-language Support**: Built-in support for Chinese, English, and Unicode text processing
- **Scalable**: Leverage Clickzetta's distributed architecture for large-scale deployments
## Configuration
### Required Environment Variables
All seven configuration parameters are required:
```bash
# Authentication
CLICKZETTA_USERNAME=your_username
CLICKZETTA_PASSWORD=your_password
# Instance configuration
CLICKZETTA_INSTANCE=your_instance_id
CLICKZETTA_SERVICE=api.clickzetta.com
CLICKZETTA_WORKSPACE=your_workspace
CLICKZETTA_VCLUSTER=your_vcluster
CLICKZETTA_SCHEMA=your_schema
```
### Optional Configuration
```bash
# Batch processing
CLICKZETTA_BATCH_SIZE=100
# Full-text search configuration
CLICKZETTA_ENABLE_INVERTED_INDEX=true
CLICKZETTA_ANALYZER_TYPE=chinese # Options: keyword, english, chinese, unicode
CLICKZETTA_ANALYZER_MODE=smart # Options: max_word, smart
# Vector search configuration
CLICKZETTA_VECTOR_DISTANCE_FUNCTION=cosine_distance # Options: l2_distance, cosine_distance
```
## Usage
### 1. Set Clickzetta as the Vector Store
In your Dify configuration, set:
```bash
VECTOR_STORE=clickzetta
```
### 2. Table Structure
Clickzetta will automatically create tables with the following structure:
```sql
CREATE TABLE <collection_name> (
id STRING NOT NULL,
content STRING NOT NULL,
metadata JSON,
vector VECTOR(FLOAT, <dimension>) NOT NULL,
PRIMARY KEY (id)
);
-- Vector index for similarity search
CREATE VECTOR INDEX idx_<collection_name>_vec
ON TABLE <schema>.<collection_name>(vector)
PROPERTIES (
"distance.function" = "cosine_distance",
"scalar.type" = "f32"
);
-- Inverted index for full-text search (if enabled)
CREATE INVERTED INDEX idx_<collection_name>_text
ON <schema>.<collection_name>(content)
PROPERTIES (
"analyzer" = "chinese",
"mode" = "smart"
);
```
## Full-Text Search Capabilities
Clickzetta supports advanced full-text search with multiple analyzers:
### Analyzer Types
1. **keyword**: No tokenization, treats the entire string as a single token
- Best for: Exact matching, IDs, codes
2. **english**: Designed for English text
- Features: Recognizes ASCII letters and numbers, converts to lowercase
- Best for: English content
3. **chinese**: Chinese text tokenizer
- Features: Recognizes Chinese and English characters, removes punctuation
- Best for: Chinese or mixed Chinese-English content
4. **unicode**: Multi-language tokenizer based on Unicode
- Features: Recognizes text boundaries in multiple languages
- Best for: Multi-language content
### Analyzer Modes
- **max_word**: Fine-grained tokenization (more tokens)
- **smart**: Intelligent tokenization (balanced)
### Full-Text Search Functions
- `MATCH_ALL(column, query)`: All terms must be present
- `MATCH_ANY(column, query)`: At least one term must be present
- `MATCH_PHRASE(column, query)`: Exact phrase matching
- `MATCH_PHRASE_PREFIX(column, query)`: Phrase prefix matching
- `MATCH_REGEXP(column, pattern)`: Regular expression matching
## Performance Optimization
### Vector Search
1. **Adjust exploration factor** for accuracy vs speed trade-off:
```sql
SET cz.vector.index.search.ef=64;
```
2. **Use appropriate distance functions**:
- `cosine_distance`: Best for normalized embeddings (e.g., from language models)
- `l2_distance`: Best for raw feature vectors
### Full-Text Search
1. **Choose the right analyzer**:
- Use `keyword` for exact matching
- Use language-specific analyzers for better tokenization
2. **Combine with vector search**:
- Pre-filter with full-text search for better performance
- Use hybrid search for improved relevance
## Troubleshooting
### Connection Issues
1. Verify all 7 required configuration parameters are set
2. Check network connectivity to Clickzetta service
3. Ensure the user has proper permissions on the schema
### Search Performance
1. Verify vector index exists:
```sql
SHOW INDEX FROM <schema>.<table_name>;
```
2. Check if vector index is being used:
```sql
EXPLAIN SELECT ... WHERE l2_distance(...) < threshold;
```
Look for `vector_index_search_type` in the execution plan.
### Full-Text Search Not Working
1. Verify inverted index is created
2. Check analyzer configuration matches your content language
3. Use `TOKENIZE()` function to test tokenization:
```sql
SELECT TOKENIZE('your text', map('analyzer', 'chinese', 'mode', 'smart'));
```
## Limitations
1. Vector operations don't support `ORDER BY` or `GROUP BY` directly on vector columns
2. Full-text search relevance scores are not provided by Clickzetta
3. Inverted index creation may fail for very large existing tables (continue without error)
4. Index naming constraints:
- Index names must be unique within a schema
- Only one vector index can be created per column
- The implementation uses timestamps to ensure unique index names
5. A column can only have one vector index at a time
## References
- [Clickzetta Vector Search Documentation](https://yunqi.tech/documents/vector-search)
- [Clickzetta Inverted Index Documentation](https://yunqi.tech/documents/inverted-index)
- [Clickzetta SQL Functions](https://yunqi.tech/documents/sql-reference)

View File

@@ -0,0 +1 @@
# Clickzetta Vector Database Integration for Dify

File diff suppressed because it is too large Load Diff

View File

@@ -7,6 +7,7 @@ from urllib.parse import urlparse
import requests
from elasticsearch import Elasticsearch
from flask import current_app
from packaging.version import parse as parse_version
from pydantic import BaseModel, model_validator
from core.rag.datasource.vdb.field import Field
@@ -22,22 +23,50 @@ logger = logging.getLogger(__name__)
class ElasticSearchConfig(BaseModel):
host: str
port: int
username: str
password: str
# Regular Elasticsearch config
host: Optional[str] = None
port: Optional[int] = None
username: Optional[str] = None
password: Optional[str] = None
# Elastic Cloud specific config
cloud_url: Optional[str] = None # Cloud URL for Elasticsearch Cloud
api_key: Optional[str] = None
# Common config
use_cloud: bool = False
ca_certs: Optional[str] = None
verify_certs: bool = False
request_timeout: int = 100000
retry_on_timeout: bool = True
max_retries: int = 10000
@model_validator(mode="before")
@classmethod
def validate_config(cls, values: dict) -> dict:
if not values["host"]:
raise ValueError("config HOST is required")
if not values["port"]:
raise ValueError("config PORT is required")
if not values["username"]:
raise ValueError("config USERNAME is required")
if not values["password"]:
raise ValueError("config PASSWORD is required")
use_cloud = values.get("use_cloud", False)
cloud_url = values.get("cloud_url")
if use_cloud:
# Cloud configuration validation - requires cloud_url and api_key
if not cloud_url:
raise ValueError("cloud_url is required for Elastic Cloud")
api_key = values.get("api_key")
if not api_key:
raise ValueError("api_key is required for Elastic Cloud")
else:
# Regular Elasticsearch validation
if not values.get("host"):
raise ValueError("config HOST is required for regular Elasticsearch")
if not values.get("port"):
raise ValueError("config PORT is required for regular Elasticsearch")
if not values.get("username"):
raise ValueError("config USERNAME is required for regular Elasticsearch")
if not values.get("password"):
raise ValueError("config PASSWORD is required for regular Elasticsearch")
return values
@@ -50,21 +79,69 @@ class ElasticSearchVector(BaseVector):
self._attributes = attributes
def _init_client(self, config: ElasticSearchConfig) -> Elasticsearch:
"""
Initialize Elasticsearch client for both regular Elasticsearch and Elastic Cloud.
"""
try:
parsed_url = urlparse(config.host)
if parsed_url.scheme in {"http", "https"}:
hosts = f"{config.host}:{config.port}"
# Check if using Elastic Cloud
client_config: dict[str, Any]
if config.use_cloud and config.cloud_url:
client_config = {
"request_timeout": config.request_timeout,
"retry_on_timeout": config.retry_on_timeout,
"max_retries": config.max_retries,
"verify_certs": config.verify_certs,
}
# Parse cloud URL and configure hosts
parsed_url = urlparse(config.cloud_url)
host = f"{parsed_url.scheme}://{parsed_url.hostname}"
if parsed_url.port:
host += f":{parsed_url.port}"
client_config["hosts"] = [host]
# API key authentication for cloud
client_config["api_key"] = config.api_key
# SSL settings
if config.ca_certs:
client_config["ca_certs"] = config.ca_certs
else:
hosts = f"http://{config.host}:{config.port}"
client = Elasticsearch(
hosts=hosts,
basic_auth=(config.username, config.password),
request_timeout=100000,
retry_on_timeout=True,
max_retries=10000,
)
except requests.exceptions.ConnectionError:
raise ConnectionError("Vector database connection error")
# Regular Elasticsearch configuration
parsed_url = urlparse(config.host or "")
if parsed_url.scheme in {"http", "https"}:
hosts = f"{config.host}:{config.port}"
use_https = parsed_url.scheme == "https"
else:
hosts = f"http://{config.host}:{config.port}"
use_https = False
client_config = {
"hosts": [hosts],
"basic_auth": (config.username, config.password),
"request_timeout": config.request_timeout,
"retry_on_timeout": config.retry_on_timeout,
"max_retries": config.max_retries,
}
# Only add SSL settings if using HTTPS
if use_https:
client_config["verify_certs"] = config.verify_certs
if config.ca_certs:
client_config["ca_certs"] = config.ca_certs
client = Elasticsearch(**client_config)
# Test connection
if not client.ping():
raise ConnectionError("Failed to connect to Elasticsearch")
except requests.exceptions.ConnectionError as e:
raise ConnectionError(f"Vector database connection error: {str(e)}")
except Exception as e:
raise ConnectionError(f"Elasticsearch client initialization failed: {str(e)}")
return client
@@ -73,7 +150,7 @@ class ElasticSearchVector(BaseVector):
return cast(str, info["version"]["number"])
def _check_version(self):
if self._version < "8.0.0":
if parse_version(self._version) < parse_version("8.0.0"):
raise ValueError("Elasticsearch vector database version must be greater than 8.0.0")
def get_type(self) -> str:
@@ -209,7 +286,11 @@ class ElasticSearchVector(BaseVector):
},
}
}
self._client.indices.create(index=self._collection_name, mappings=mappings)
logger.info("Created index %s with dimension %s", self._collection_name, dim)
else:
logger.info("Collection %s already exists.", self._collection_name)
redis_client.set(collection_exist_cache_key, 1, ex=3600)
@@ -225,13 +306,51 @@ class ElasticSearchVectorFactory(AbstractVectorFactory):
dataset.index_struct = json.dumps(self.gen_index_struct_dict(VectorType.ELASTICSEARCH, collection_name))
config = current_app.config
# Check if ELASTICSEARCH_USE_CLOUD is explicitly set to false (boolean)
use_cloud_env = config.get("ELASTICSEARCH_USE_CLOUD", False)
if use_cloud_env is False:
# Use regular Elasticsearch with config values
config_dict = {
"use_cloud": False,
"host": config.get("ELASTICSEARCH_HOST", "elasticsearch"),
"port": config.get("ELASTICSEARCH_PORT", 9200),
"username": config.get("ELASTICSEARCH_USERNAME", "elastic"),
"password": config.get("ELASTICSEARCH_PASSWORD", "elastic"),
}
else:
# Check for cloud configuration
cloud_url = config.get("ELASTICSEARCH_CLOUD_URL")
if cloud_url:
config_dict = {
"use_cloud": True,
"cloud_url": cloud_url,
"api_key": config.get("ELASTICSEARCH_API_KEY"),
}
else:
# Fallback to regular Elasticsearch
config_dict = {
"use_cloud": False,
"host": config.get("ELASTICSEARCH_HOST", "localhost"),
"port": config.get("ELASTICSEARCH_PORT", 9200),
"username": config.get("ELASTICSEARCH_USERNAME", "elastic"),
"password": config.get("ELASTICSEARCH_PASSWORD", ""),
}
# Common configuration
config_dict.update(
{
"ca_certs": str(config.get("ELASTICSEARCH_CA_CERTS")) if config.get("ELASTICSEARCH_CA_CERTS") else None,
"verify_certs": bool(config.get("ELASTICSEARCH_VERIFY_CERTS", False)),
"request_timeout": int(config.get("ELASTICSEARCH_REQUEST_TIMEOUT", 100000)),
"retry_on_timeout": bool(config.get("ELASTICSEARCH_RETRY_ON_TIMEOUT", True)),
"max_retries": int(config.get("ELASTICSEARCH_MAX_RETRIES", 10000)),
}
)
return ElasticSearchVector(
index_name=collection_name,
config=ElasticSearchConfig(
host=config.get("ELASTICSEARCH_HOST", "localhost"),
port=config.get("ELASTICSEARCH_PORT", 9200),
username=config.get("ELASTICSEARCH_USERNAME", ""),
password=config.get("ELASTICSEARCH_PASSWORD", ""),
),
config=ElasticSearchConfig(**config_dict),
attributes=[],
)

View File

@@ -1,5 +1,6 @@
import json
import logging
import math
from typing import Any, Optional
import tablestore # type: ignore
@@ -22,6 +23,7 @@ class TableStoreConfig(BaseModel):
access_key_secret: Optional[str] = None
instance_name: Optional[str] = None
endpoint: Optional[str] = None
normalize_full_text_bm25_score: Optional[bool] = False
@model_validator(mode="before")
@classmethod
@@ -47,6 +49,7 @@ class TableStoreVector(BaseVector):
config.access_key_secret,
config.instance_name,
)
self._normalize_full_text_bm25_score = config.normalize_full_text_bm25_score
self._table_name = f"{collection_name}"
self._index_name = f"{collection_name}_idx"
self._tags_field = f"{Field.METADATA_KEY.value}_tags"
@@ -131,8 +134,8 @@ class TableStoreVector(BaseVector):
filtered_list = None
if document_ids_filter:
filtered_list = ["document_id=" + item for item in document_ids_filter]
return self._search_by_full_text(query, filtered_list, top_k)
score_threshold = float(kwargs.get("score_threshold") or 0.0)
return self._search_by_full_text(query, filtered_list, top_k, score_threshold)
def delete(self) -> None:
self._delete_table_if_exist()
@@ -318,7 +321,19 @@ class TableStoreVector(BaseVector):
documents = sorted(documents, key=lambda x: x.metadata["score"] if x.metadata else 0, reverse=True)
return documents
def _search_by_full_text(self, query: str, document_ids_filter: list[str] | None, top_k: int) -> list[Document]:
@staticmethod
def _normalize_score_exp_decay(score: float, k: float = 0.15) -> float:
"""
Args:
score: BM25 search score.
k: decay factor, the larger the k, the steeper the low score end
"""
normalized_score = 1 - math.exp(-k * score)
return max(0.0, min(1.0, normalized_score))
def _search_by_full_text(
self, query: str, document_ids_filter: list[str] | None, top_k: int, score_threshold: float
) -> list[Document]:
bool_query = tablestore.BoolQuery(must_queries=[], filter_queries=[], should_queries=[], must_not_queries=[])
bool_query.must_queries.append(tablestore.MatchQuery(text=query, field_name=Field.CONTENT_KEY.value))
@@ -339,15 +354,27 @@ class TableStoreVector(BaseVector):
documents = []
for search_hit in search_response.search_hits:
score = None
if self._normalize_full_text_bm25_score:
score = self._normalize_score_exp_decay(search_hit.score)
# skip when score is below threshold and use normalize score
if score and score <= score_threshold:
continue
ots_column_map = {}
for col in search_hit.row[1]:
ots_column_map[col[0]] = col[1]
vector_str = ots_column_map.get(Field.VECTOR.value)
metadata_str = ots_column_map.get(Field.METADATA_KEY.value)
vector = json.loads(vector_str) if vector_str else None
metadata = json.loads(metadata_str) if metadata_str else {}
vector_str = ots_column_map.get(Field.VECTOR.value)
vector = json.loads(vector_str) if vector_str else None
if score:
metadata["score"] = score
documents.append(
Document(
page_content=ots_column_map.get(Field.CONTENT_KEY.value) or "",
@@ -355,6 +382,8 @@ class TableStoreVector(BaseVector):
metadata=metadata,
)
)
if self._normalize_full_text_bm25_score:
documents = sorted(documents, key=lambda x: x.metadata["score"] if x.metadata else 0, reverse=True)
return documents
@@ -375,5 +404,6 @@ class TableStoreVectorFactory(AbstractVectorFactory):
instance_name=dify_config.TABLESTORE_INSTANCE_NAME,
access_key_id=dify_config.TABLESTORE_ACCESS_KEY_ID,
access_key_secret=dify_config.TABLESTORE_ACCESS_KEY_SECRET,
normalize_full_text_bm25_score=dify_config.TABLESTORE_NORMALIZE_FULLTEXT_BM25_SCORE,
),
)

View File

@@ -246,6 +246,10 @@ class TencentVector(BaseVector):
return self._get_search_res(res, score_threshold)
def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]:
document_ids_filter = kwargs.get("document_ids_filter")
filter = None
if document_ids_filter:
filter = Filter(Filter.In("metadata.document_id", document_ids_filter))
if not self._enable_hybrid_search:
return []
res = self._client.hybrid_search(
@@ -269,6 +273,7 @@ class TencentVector(BaseVector):
),
retrieve_vector=False,
limit=kwargs.get("top_k", 4),
filter=filter,
)
score_threshold = float(kwargs.get("score_threshold") or 0.0)
return self._get_search_res(res, score_threshold)

View File

@@ -172,6 +172,10 @@ class Vector:
from core.rag.datasource.vdb.matrixone.matrixone_vector import MatrixoneVectorFactory
return MatrixoneVectorFactory
case VectorType.CLICKZETTA:
from core.rag.datasource.vdb.clickzetta.clickzetta_vector import ClickzettaVectorFactory
return ClickzettaVectorFactory
case _:
raise ValueError(f"Vector store {vector_type} is not supported.")

View File

@@ -30,3 +30,4 @@ class VectorType(StrEnum):
TABLESTORE = "tablestore"
HUAWEI_CLOUD = "huawei_cloud"
MATRIXONE = "matrixone"
CLICKZETTA = "clickzetta"

View File

@@ -13,6 +13,8 @@ SupportedComparisonOperator = Literal[
"is not",
"empty",
"not empty",
"in",
"not in",
# for number
"=",
"",

View File

@@ -1,5 +1,6 @@
import json
import logging
import operator
from typing import Any, Optional, cast
import requests
@@ -130,13 +131,15 @@ class NotionExtractor(BaseExtractor):
data[property_name] = value
row_dict = {k: v for k, v in data.items() if v}
row_content = ""
for key, value in row_dict.items():
for key, value in sorted(row_dict.items(), key=operator.itemgetter(0)):
if isinstance(value, dict):
value_dict = {k: v for k, v in value.items() if v}
value_content = "".join(f"{k}:{v} " for k, v in value_dict.items())
row_content = row_content + f"{key}:{value_content}\n"
else:
row_content = row_content + f"{key}:{value}\n"
if "url" in result:
row_content = row_content + f"Row Page URL:{result.get('url', '')}\n"
database_content.append(row_content)
has_more = response_data.get("has_more", False)

View File

@@ -62,7 +62,7 @@ class WordExtractor(BaseExtractor):
def extract(self) -> list[Document]:
"""Load given path as single page."""
content = self.parse_docx(self.file_path, "storage")
content = self.parse_docx(self.file_path)
return [
Document(
page_content=content,
@@ -189,23 +189,8 @@ class WordExtractor(BaseExtractor):
paragraph_content.append(run.text)
return "".join(paragraph_content).strip()
def _parse_paragraph(self, paragraph, image_map):
paragraph_content = []
for run in paragraph.runs:
if run.element.xpath(".//a:blip"):
for blip in run.element.xpath(".//a:blip"):
embed_id = blip.get("{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed")
if embed_id:
rel_target = run.part.rels[embed_id].target_ref
if rel_target in image_map:
paragraph_content.append(image_map[rel_target])
if run.text.strip():
paragraph_content.append(run.text.strip())
return " ".join(paragraph_content) if paragraph_content else ""
def parse_docx(self, docx_path, image_folder):
def parse_docx(self, docx_path):
doc = DocxDocument(docx_path)
os.makedirs(image_folder, exist_ok=True)
content = []

View File

@@ -5,14 +5,13 @@ from __future__ import annotations
from typing import Any, Optional
from core.model_manager import ModelInstance
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenzier import GPT2Tokenizer
from core.model_runtime.model_providers.__base.tokenizers.gpt2_tokenizer import GPT2Tokenizer
from core.rag.splitter.text_splitter import (
TS,
Collection,
Literal,
RecursiveCharacterTextSplitter,
Set,
TokenTextSplitter,
Union,
)
@@ -45,14 +44,6 @@ class EnhanceRecursiveCharacterTextSplitter(RecursiveCharacterTextSplitter):
return [len(text) for text in texts]
if issubclass(cls, TokenTextSplitter):
extra_kwargs = {
"model_name": embedding_model_instance.model if embedding_model_instance else "gpt2",
"allowed_special": allowed_special,
"disallowed_special": disallowed_special,
}
kwargs = {**kwargs, **extra_kwargs}
return cls(length_function=_character_encoder, **kwargs)

View File

@@ -20,9 +20,6 @@ class Tool(ABC):
The base class of a tool
"""
entity: ToolEntity
runtime: ToolRuntime
def __init__(self, entity: ToolEntity, runtime: ToolRuntime) -> None:
self.entity = entity
self.runtime = runtime

View File

@@ -37,12 +37,12 @@ class LocaltimeToTimestampTool(BuiltinTool):
@staticmethod
def localtime_to_timestamp(localtime: str, time_format: str, local_tz=None) -> int | None:
try:
if local_tz is None:
local_tz = datetime.now().astimezone().tzinfo
if isinstance(local_tz, str):
local_tz = pytz.timezone(local_tz)
local_time = datetime.strptime(localtime, time_format)
localtime = local_tz.localize(local_time) # type: ignore
if local_tz is None:
localtime = local_time.astimezone() # type: ignore
elif isinstance(local_tz, str):
local_tz = pytz.timezone(local_tz)
localtime = local_tz.localize(local_time) # type: ignore
timestamp = int(localtime.timestamp()) # type: ignore
return timestamp
except Exception as e:

View File

@@ -27,7 +27,7 @@ class TimezoneConversionTool(BuiltinTool):
target_time = self.timezone_convert(current_time, current_timezone, target_timezone) # type: ignore
if not target_time:
yield self.create_text_message(
f"Invalid datatime and timezone: {current_time},{current_timezone},{target_timezone}"
f"Invalid datetime and timezone: {current_time},{current_timezone},{target_timezone}"
)
return

View File

@@ -20,8 +20,6 @@ class BuiltinTool(Tool):
:param meta: the meta data of a tool call processing
"""
provider: str
def __init__(self, provider: str, **kwargs):
super().__init__(**kwargs)
self.provider = provider

Some files were not shown because too many files have changed in this diff Show More