diff --git a/web/contract/console/evaluation.ts b/web/contract/console/evaluation.ts new file mode 100644 index 0000000000..ded068b80c --- /dev/null +++ b/web/contract/console/evaluation.ts @@ -0,0 +1,305 @@ +import type { + EvaluationConfig, + EvaluationConfigData, + EvaluationFileInfo, + EvaluationLogsResponse, + EvaluationMetricsListResponse, + EvaluationMetricsMapResponse, + EvaluationNodeInfoRequest, + EvaluationNodeInfoResponse, + EvaluationRun, + EvaluationRunDetailResponse, + EvaluationRunRequest, + EvaluationTargetType, + EvaluationVersionDetailResponse, +} from '@/types/evaluation' +import { type } from '@orpc/contract' +import { base } from '../base' + +export const datasetEvaluationTemplateDownloadContract = base + .route({ + path: '/datasets/{datasetId}/evaluation/template/download', + method: 'POST', + }) + .input(type<{ + params: { + datasetId: string + } + }>()) + .output(type()) + +export const datasetEvaluationConfigContract = base + .route({ + path: '/datasets/{datasetId}/evaluation', + method: 'GET', + }) + .input(type<{ + params: { + datasetId: string + } + }>()) + .output(type()) + +export const saveDatasetEvaluationConfigContract = base + .route({ + path: '/datasets/{datasetId}/evaluation', + method: 'PUT', + }) + .input(type<{ + params: { + datasetId: string + } + body: EvaluationConfigData + }>()) + .output(type()) + +export const startDatasetEvaluationRunContract = base + .route({ + path: '/datasets/{datasetId}/evaluation/run', + method: 'POST', + }) + .input(type<{ + params: { + datasetId: string + } + body: EvaluationRunRequest + }>()) + .output(type()) + +export const datasetEvaluationLogsContract = base + .route({ + path: '/datasets/{datasetId}/evaluation/logs', + method: 'GET', + }) + .input(type<{ + params: { + datasetId: string + } + query: { + page?: number + page_size?: number + } + }>()) + .output(type()) + +export const datasetEvaluationRunDetailContract = base + .route({ + path: '/datasets/{datasetId}/evaluation/runs/{runId}', + method: 'GET', + }) + .input(type<{ + params: { + datasetId: string + runId: string + } + query: { + page?: number + page_size?: number + } + }>()) + .output(type()) + +export const cancelDatasetEvaluationRunContract = base + .route({ + path: '/datasets/{datasetId}/evaluation/runs/{runId}/cancel', + method: 'POST', + }) + .input(type<{ + params: { + datasetId: string + runId: string + } + }>()) + .output(type()) + +export const datasetEvaluationMetricsContract = base + .route({ + path: '/datasets/{datasetId}/evaluation/metrics', + method: 'GET', + }) + .input(type<{ + params: { + datasetId: string + } + }>()) + .output(type()) + +export const datasetEvaluationFileContract = base + .route({ + path: '/datasets/{datasetId}/evaluation/files/{fileId}', + method: 'GET', + }) + .input(type<{ + params: { + datasetId: string + fileId: string + } + }>()) + .output(type()) + +export const evaluationTemplateDownloadContract = base + .route({ + path: '/{targetType}/{targetId}/dataset-template/download', + method: 'POST', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + } + }>()) + .output(type()) + +export const evaluationConfigContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation', + method: 'GET', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + } + }>()) + .output(type()) + +export const saveEvaluationConfigContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation', + method: 'PUT', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + } + body: EvaluationConfigData + }>()) + .output(type()) + +export const evaluationLogsContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation/logs', + method: 'GET', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + } + query: { + page?: number + page_size?: number + } + }>()) + .output(type()) + +export const startEvaluationRunContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation/run', + method: 'POST', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + } + body: EvaluationRunRequest + }>()) + .output(type()) + +export const evaluationRunDetailContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation/runs/{runId}', + method: 'GET', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + runId: string + } + query: { + page?: number + page_size?: number + } + }>()) + .output(type()) + +export const cancelEvaluationRunContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation/runs/{runId}/cancel', + method: 'POST', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + runId: string + } + }>()) + .output(type()) + +export const evaluationMetricsContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation/metrics', + method: 'GET', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + } + }>()) + .output(type()) + +export const evaluationNodeInfoContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation/node-info', + method: 'POST', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + } + body: EvaluationNodeInfoRequest + }>()) + .output(type()) + +export const availableEvaluationMetricsContract = base + .route({ + path: '/evaluation/available-metrics', + method: 'GET', + }) + .output(type()) + +export const evaluationFileContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation/files/{fileId}', + method: 'GET', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + fileId: string + } + }>()) + .output(type()) + +export const evaluationVersionDetailContract = base + .route({ + path: '/{targetType}/{targetId}/evaluation/version', + method: 'GET', + }) + .input(type<{ + params: { + targetType: EvaluationTargetType + targetId: string + } + query: { + version: string + } + }>()) + .output(type()) diff --git a/web/contract/router.ts b/web/contract/router.ts index 57053dfc11..de5869717a 100644 --- a/web/contract/router.ts +++ b/web/contract/router.ts @@ -1,6 +1,29 @@ import type { InferContractRouterInputs } from '@orpc/contract' import { appDeleteContract } from './console/apps' import { bindPartnerStackContract, invoicesContract } from './console/billing' +import { + availableEvaluationMetricsContract, + cancelDatasetEvaluationRunContract, + cancelEvaluationRunContract, + datasetEvaluationConfigContract, + datasetEvaluationFileContract, + datasetEvaluationLogsContract, + datasetEvaluationMetricsContract, + datasetEvaluationRunDetailContract, + datasetEvaluationTemplateDownloadContract, + evaluationConfigContract, + evaluationFileContract, + evaluationLogsContract, + evaluationMetricsContract, + evaluationNodeInfoContract, + evaluationRunDetailContract, + evaluationTemplateDownloadContract, + evaluationVersionDetailContract, + saveDatasetEvaluationConfigContract, + saveEvaluationConfigContract, + startDatasetEvaluationRunContract, + startEvaluationRunContract, +} from './console/evaluation' import { exploreAppDetailContract, exploreAppsContract, @@ -97,6 +120,31 @@ export const consoleRouterContract = { models: modelProvidersModelsContract, changePreferredProviderType: changePreferredProviderTypeContract, }, + evaluation: { + templateDownload: evaluationTemplateDownloadContract, + config: evaluationConfigContract, + saveConfig: saveEvaluationConfigContract, + logs: evaluationLogsContract, + startRun: startEvaluationRunContract, + runDetail: evaluationRunDetailContract, + cancelRun: cancelEvaluationRunContract, + metrics: evaluationMetricsContract, + nodeInfo: evaluationNodeInfoContract, + availableMetrics: availableEvaluationMetricsContract, + file: evaluationFileContract, + versionDetail: evaluationVersionDetailContract, + }, + datasetEvaluation: { + templateDownload: datasetEvaluationTemplateDownloadContract, + config: datasetEvaluationConfigContract, + saveConfig: saveDatasetEvaluationConfigContract, + startRun: startDatasetEvaluationRunContract, + logs: datasetEvaluationLogsContract, + runDetail: datasetEvaluationRunDetailContract, + cancelRun: cancelDatasetEvaluationRunContract, + metrics: datasetEvaluationMetricsContract, + file: datasetEvaluationFileContract, + }, plugins: { checkInstalled: pluginCheckInstalledContract, latestVersions: pluginLatestVersionsContract, diff --git a/web/service/use-evaluation.ts b/web/service/use-evaluation.ts new file mode 100644 index 0000000000..13b5086f49 --- /dev/null +++ b/web/service/use-evaluation.ts @@ -0,0 +1,222 @@ +import type { + EvaluationConfigData, + EvaluationNodeInfoRequest, + EvaluationTargetType, +} from '@/types/evaluation' +import { useMutation, useQuery, useQueryClient } from '@tanstack/react-query' +import { consoleQuery } from '@/service/client' + +export type EvaluationResourceType = 'workflow' | 'pipeline' | 'snippet' + +type EvaluationLogsParams = { + page?: number + page_size?: number +} + +type EvaluationRunDetailParams = { + page?: number + page_size?: number +} + +const toEvaluationTargetType = (resourceType: Exclude): EvaluationTargetType => { + return resourceType === 'snippet' ? 'snippets' : 'app' +} + +const invalidateEvaluationQueries = async ( + queryClient: ReturnType, + resourceType: EvaluationResourceType, +) => { + const queryKey = resourceType === 'pipeline' + ? consoleQuery.datasetEvaluation.key() + : consoleQuery.evaluation.key() + + await queryClient.invalidateQueries({ queryKey }) +} + +export const useEvaluationConfig = (resourceType: Exclude, resourceId: string) => { + return useQuery(consoleQuery.evaluation.config.queryOptions({ + input: { + params: { + targetType: toEvaluationTargetType(resourceType), + targetId: resourceId, + }, + }, + enabled: !!resourceId, + })) +} + +export const useDatasetEvaluationConfig = (datasetId: string) => { + return useQuery(consoleQuery.datasetEvaluation.config.queryOptions({ + input: { + params: { datasetId }, + }, + enabled: !!datasetId, + })) +} + +export const useEvaluationLogs = ( + resourceType: Exclude, + resourceId: string, + params: EvaluationLogsParams = {}, +) => { + return useQuery(consoleQuery.evaluation.logs.queryOptions({ + input: { + params: { + targetType: toEvaluationTargetType(resourceType), + targetId: resourceId, + }, + query: params, + }, + enabled: !!resourceId, + })) +} + +export const useDatasetEvaluationLogs = (datasetId: string, params: EvaluationLogsParams = {}) => { + return useQuery(consoleQuery.datasetEvaluation.logs.queryOptions({ + input: { + params: { datasetId }, + query: params, + }, + enabled: !!datasetId, + })) +} + +export const useEvaluationMetrics = (resourceType: Exclude, resourceId: string) => { + return useQuery(consoleQuery.evaluation.metrics.queryOptions({ + input: { + params: { + targetType: toEvaluationTargetType(resourceType), + targetId: resourceId, + }, + }, + enabled: !!resourceId, + })) +} + +export const useDatasetEvaluationMetrics = (datasetId: string) => { + return useQuery(consoleQuery.datasetEvaluation.metrics.queryOptions({ + input: { + params: { datasetId }, + }, + enabled: !!datasetId, + })) +} + +export const useAvailableEvaluationMetrics = (enabled = true) => { + return useQuery(consoleQuery.evaluation.availableMetrics.queryOptions({ + enabled, + })) +} + +export const useEvaluationNodeInfoMutation = () => { + return useMutation(consoleQuery.evaluation.nodeInfo.mutationOptions()) +} + +export const useSaveEvaluationConfigMutation = (resourceType: Exclude) => { + const queryClient = useQueryClient() + + return useMutation({ + ...consoleQuery.evaluation.saveConfig.mutationOptions({ + onSuccess: async () => { + await invalidateEvaluationQueries(queryClient, resourceType) + }, + }), + }) +} + +export const useSaveDatasetEvaluationConfigMutation = () => { + const queryClient = useQueryClient() + + return useMutation({ + ...consoleQuery.datasetEvaluation.saveConfig.mutationOptions({ + onSuccess: async () => { + await invalidateEvaluationQueries(queryClient, 'pipeline') + }, + }), + }) +} + +export const useStartEvaluationRunMutation = (resourceType: Exclude) => { + const queryClient = useQueryClient() + + return useMutation({ + ...consoleQuery.evaluation.startRun.mutationOptions({ + onSuccess: async () => { + await invalidateEvaluationQueries(queryClient, resourceType) + }, + }), + }) +} + +export const useStartDatasetEvaluationRunMutation = () => { + const queryClient = useQueryClient() + + return useMutation({ + ...consoleQuery.datasetEvaluation.startRun.mutationOptions({ + onSuccess: async () => { + await invalidateEvaluationQueries(queryClient, 'pipeline') + }, + }), + }) +} + +export const useCancelEvaluationRunMutation = (resourceType: Exclude) => { + const queryClient = useQueryClient() + + return useMutation({ + ...consoleQuery.evaluation.cancelRun.mutationOptions({ + onSuccess: async () => { + await invalidateEvaluationQueries(queryClient, resourceType) + }, + }), + }) +} + +export const useCancelDatasetEvaluationRunMutation = () => { + const queryClient = useQueryClient() + + return useMutation({ + ...consoleQuery.datasetEvaluation.cancelRun.mutationOptions({ + onSuccess: async () => { + await invalidateEvaluationQueries(queryClient, 'pipeline') + }, + }), + }) +} + +export const useEvaluationRunDetail = ( + resourceType: Exclude, + resourceId: string, + runId: string, + params: EvaluationRunDetailParams = {}, +) => { + return useQuery(consoleQuery.evaluation.runDetail.queryOptions({ + input: { + params: { + targetType: toEvaluationTargetType(resourceType), + targetId: resourceId, + runId, + }, + query: params, + }, + enabled: !!resourceId && !!runId, + })) +} + +export const useDatasetEvaluationRunDetail = (datasetId: string, runId: string, params: EvaluationRunDetailParams = {}) => { + return useQuery(consoleQuery.datasetEvaluation.runDetail.queryOptions({ + input: { + params: { + datasetId, + runId, + }, + query: params, + }, + enabled: !!datasetId && !!runId, + })) +} + +export type { + EvaluationConfigData, + EvaluationNodeInfoRequest, +} diff --git a/web/types/evaluation.ts b/web/types/evaluation.ts new file mode 100644 index 0000000000..4f4e097614 --- /dev/null +++ b/web/types/evaluation.ts @@ -0,0 +1,126 @@ +export type EvaluationTargetType = 'app' | 'snippets' + +export type EvaluationConfig = { + evaluation_model: string | null + evaluation_model_provider: string | null + metrics_config: Record | null + judgement_conditions: Record | null +} + +export type NodeInfo = { + node_id: string + type: string + title: string +} + +export type EvaluationDefaultMetric = { + metric?: string + node_info_list?: NodeInfo[] +} + +export type EvaluationCustomizedMetric = { + evaluation_workflow_id?: string + input_fields?: Record + output_fields?: Record[] +} + +export type EvaluationConfigData = { + evaluation_model?: string + evaluation_model_provider?: string + default_metrics?: EvaluationDefaultMetric[] + customized_metrics?: EvaluationCustomizedMetric | null + judgment_config?: Record | null +} + +export type EvaluationRunRequest = EvaluationConfigData & { + file_id: string +} + +export type EvaluationRunStatus = 'pending' | 'running' | 'completed' | 'failed' | 'cancelled' + +export type EvaluationRun = { + id: string + tenant_id: string + target_type: string + target_id: string + evaluation_config_id: string + status: EvaluationRunStatus + dataset_file_id: string | null + result_file_id: string | null + total_items: number + completed_items: number + failed_items: number + progress: number + metrics_summary: Record + error: string | null + created_by: string + started_at: number | null + completed_at: number | null + created_at: number +} + +export type EvaluationRunMetric = { + name?: string + value?: unknown + details?: Record +} + +export type EvaluationRunItem = { + id: string + item_index: number + inputs: Record + expected_output: string | null + actual_output: string | null + metrics: EvaluationRunMetric[] + judgment: Record + metadata: Record + error: string | null + overall_score: number | null +} + +export type EvaluationLogsResponse = { + data: EvaluationRun[] + total: number + page: number + page_size: number +} + +export type EvaluationRunItemsPagination = { + data: EvaluationRunItem[] + total: number + page: number + page_size: number +} + +export type EvaluationRunDetailResponse = { + run: EvaluationRun + items: EvaluationRunItemsPagination +} + +export type EvaluationMetricsMapResponse = { + metrics: Record +} + +export type EvaluationMetricsListResponse = { + metrics: string[] +} + +export type EvaluationNodeInfoRequest = { + metrics?: string[] +} + +export type EvaluationNodeInfoResponse = Record + +export type EvaluationFileInfo = { + id: string + name: string + size: number + extension: string + mime_type: string + created_at: number + download_url: string +} + +export type EvaluationVersionDetailResponse = { + graph: Record +}