Provides defined Message types for formatting data assigned to protobuf.Value fields in AI Platform. PiperOrigin-RevId: 342967619
481 lines
19 KiB
Protocol Buffer
481 lines
19 KiB
Protocol Buffer
// Copyright 2020 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
syntax = "proto3";
|
|
|
|
package google.cloud.aiplatform.v1beta1.schema;
|
|
|
|
import "google/cloud/aiplatform/v1beta1/schema/annotation_spec_color.proto";
|
|
import "google/cloud/aiplatform/v1beta1/schema/geometry.proto";
|
|
import "google/protobuf/duration.proto";
|
|
import "google/protobuf/struct.proto";
|
|
import "google/protobuf/wrappers.proto";
|
|
import "google/api/annotations.proto";
|
|
|
|
option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1/schema;schema";
|
|
option java_multiple_files = true;
|
|
option java_outer_classname = "IoFormatProto";
|
|
option java_package = "com.google.cloud.aiplatform.v1beta1.schema";
|
|
|
|
// Prediction input format for Image Classification.
|
|
message ImageClassificationPredictionInstance {
|
|
// The image bytes or GCS URI to make the prediction on.
|
|
string content = 1;
|
|
|
|
// The MIME type of the content of the image. Only the images in below listed
|
|
// MIME types are supported.
|
|
// - image/jpeg
|
|
// - image/gif
|
|
// - image/png
|
|
// - image/webp
|
|
// - image/bmp
|
|
// - image/tiff
|
|
// - image/vnd.microsoft.icon
|
|
string mime_type = 2;
|
|
}
|
|
|
|
// Prediction input format for Image Object Detection.
|
|
message ImageObjectDetectionPredictionInstance {
|
|
// The image bytes or GCS URI to make the prediction on.
|
|
string content = 1;
|
|
|
|
// The MIME type of the content of the image. Only the images in below listed
|
|
// MIME types are supported.
|
|
// - image/jpeg
|
|
// - image/gif
|
|
// - image/png
|
|
// - image/webp
|
|
// - image/bmp
|
|
// - image/tiff
|
|
// - image/vnd.microsoft.icon
|
|
string mime_type = 2;
|
|
}
|
|
|
|
// Prediction input format for Image Segmentation.
|
|
message ImageSegmentationPredictionInstance {
|
|
// The image bytes to make the predictions on.
|
|
string content = 1;
|
|
|
|
// The MIME type of the content of the image. Only the images in below listed
|
|
// MIME types are supported.
|
|
// - image/jpeg
|
|
// - image/png
|
|
string mime_type = 2;
|
|
}
|
|
|
|
// Prediction input format for Video Classification.
|
|
message VideoClassificationPredictionInstance {
|
|
// The Google Cloud Storage location of the video on which to perform the
|
|
// prediction.
|
|
string content = 1;
|
|
|
|
// The MIME type of the content of the video. Only the following are
|
|
// supported: video/mp4 video/avi video/quicktime
|
|
string mime_type = 2;
|
|
|
|
// The beginning, inclusive, of the video's time segment on which to perform
|
|
// the prediction. Expressed as a number of seconds as measured from the
|
|
// start of the video, with "s" appended at the end. Fractions are allowed,
|
|
// up to a microsecond precision.
|
|
string time_segment_start = 3;
|
|
|
|
// The end, exclusive, of the video's time segment on which to perform
|
|
// the prediction. Expressed as a number of seconds as measured from the
|
|
// start of the video, with "s" appended at the end. Fractions are allowed,
|
|
// up to a microsecond precision, and "Infinity" is allowed, which means the
|
|
// end of the video.
|
|
string time_segment_end = 4;
|
|
}
|
|
|
|
// Prediction input format for Video Classification.
|
|
message VideoObjectTrackingPredictionInstance {
|
|
// The Google Cloud Storage location of the video on which to perform the
|
|
// prediction.
|
|
string content = 1;
|
|
|
|
// The MIME type of the content of the video. Only the following are
|
|
// supported: video/mp4 video/avi video/quicktime
|
|
string mime_type = 2;
|
|
|
|
// The beginning, inclusive, of the video's time segment on which to perform
|
|
// the prediction. Expressed as a number of seconds as measured from the
|
|
// start of the video, with "s" appended at the end. Fractions are allowed,
|
|
// up to a microsecond precision.
|
|
string time_segment_start = 3;
|
|
|
|
// The end, exclusive, of the video's time segment on which to perform
|
|
// the prediction. Expressed as a number of seconds as measured from the
|
|
// start of the video, with "s" appended at the end. Fractions are allowed,
|
|
// up to a microsecond precision, and "Infinity" is allowed, which means the
|
|
// end of the video.
|
|
string time_segment_end = 4;
|
|
}
|
|
|
|
// Prediction input format for Video Action Recognition.
|
|
message VideoActionRecognitionPredictionInstance {
|
|
// The Google Cloud Storage location of the video on which to perform the
|
|
// prediction.
|
|
string content = 1;
|
|
|
|
// The MIME type of the content of the video. Only the following are
|
|
// supported: video/mp4 video/avi video/quicktime
|
|
string mime_type = 2;
|
|
|
|
// The beginning, inclusive, of the video's time segment on which to perform
|
|
// the prediction. Expressed as a number of seconds as measured from the
|
|
// start of the video, with "s" appended at the end. Fractions are allowed,
|
|
// up to a microsecond precision.
|
|
string time_segment_start = 3;
|
|
|
|
// The end, exclusive, of the video's time segment on which to perform
|
|
// the prediction. Expressed as a number of seconds as measured from the
|
|
// start of the video, with "s" appended at the end. Fractions are allowed,
|
|
// up to a microsecond precision, and "Infinity" is allowed, which means the
|
|
// end of the video.
|
|
string time_segment_end = 4;
|
|
}
|
|
|
|
// Prediction input format for Text Classification.
|
|
message TextClassificationPredictionInstance {
|
|
// The text snippet to make the predictions on.
|
|
string content = 1;
|
|
|
|
// The MIME type of the text snippet. The supported MIME types are listed
|
|
// below.
|
|
// - text/plain
|
|
string mime_type = 2;
|
|
}
|
|
|
|
// Prediction input format for Text Sentiment.
|
|
message TextSentimentPredictionInstance {
|
|
// The text snippet to make the predictions on.
|
|
string content = 1;
|
|
|
|
// The MIME type of the text snippet. The supported MIME types are listed
|
|
// below.
|
|
// - text/plain
|
|
string mime_type = 2;
|
|
}
|
|
|
|
// Prediction input format for Text Extraction.
|
|
message TextExtractionPredictionInstance {
|
|
// The text snippet to make the predictions on.
|
|
string content = 1;
|
|
|
|
// The MIME type of the text snippet. The supported MIME types are listed
|
|
// below.
|
|
// - text/plain
|
|
string mime_type = 2;
|
|
|
|
// This field is only used for batch prediction. If a key is provided, the
|
|
// batch prediction result will by mapped to this key. If omitted, then the
|
|
// batch prediction result will contain the entire input instance. AI Platform
|
|
// will not check if keys in the request are duplicates, so it is up to the
|
|
// caller to ensure the keys are unique.
|
|
string key = 3;
|
|
}
|
|
|
|
// Prediction model parameters for Image Classification.
|
|
message ImageClassificationPredictionParams {
|
|
// The Model only returns predictions with at least this confidence score.
|
|
// Default value is 0.0
|
|
float confidence_threshold = 1;
|
|
|
|
// The Model only returns up to that many top, by confidence score,
|
|
// predictions per instance. If this number is very high, the Model may return
|
|
// fewer predictions. Default value is 10.
|
|
int32 max_predictions = 2;
|
|
}
|
|
|
|
// Prediction model parameters for Image Object Detection.
|
|
message ImageObjectDetectionPredictionParams {
|
|
// The Model only returns predictions with at least this confidence score.
|
|
// Default value is 0.0
|
|
float confidence_threshold = 1;
|
|
|
|
// The Model only returns up to that many top, by confidence score,
|
|
// predictions per instance. Note that number of returned predictions is also
|
|
// limited by metadata's predictionsLimit. Default value is 10.
|
|
int32 max_predictions = 2;
|
|
}
|
|
|
|
// Prediction model parameters for Image Segmentation.
|
|
message ImageSegmentationPredictionParams {
|
|
// When the model predicts category of pixels of the image, it will only
|
|
// provide predictions for pixels that it is at least this much confident
|
|
// about. All other pixels will be classified as background. Default value is
|
|
// 0.5.
|
|
float confidence_threshold = 1;
|
|
}
|
|
|
|
// Prediction model parameters for Video Classification.
|
|
message VideoClassificationPredictionParams {
|
|
// The Model only returns predictions with at least this confidence score.
|
|
// Default value is 0.0
|
|
float confidence_threshold = 1;
|
|
|
|
// The Model only returns up to that many top, by confidence score,
|
|
// predictions per instance. If this number is very high, the Model may return
|
|
// fewer predictions. Default value is 10,000.
|
|
int32 max_predictions = 2;
|
|
|
|
// Set to true to request segment-level classification. AI Platform returns
|
|
// labels and their confidence scores for the entire time segment of the
|
|
// video that user specified in the input instance.
|
|
// Default value is true
|
|
bool segment_classification = 3;
|
|
|
|
// Set to true to request shot-level classification. AI Platform determines
|
|
// the boundaries for each camera shot in the entire time segment of the
|
|
// video that user specified in the input instance. AI Platform then
|
|
// returns labels and their confidence scores for each detected shot, along
|
|
// with the start and end time of the shot.
|
|
// WARNING: Model evaluation is not done for this classification type,
|
|
// the quality of it depends on the training data, but there are no metrics
|
|
// provided to describe that quality.
|
|
// Default value is false
|
|
bool shot_classification = 4;
|
|
|
|
// Set to true to request classification for a video at one-second intervals.
|
|
// AI Platform returns labels and their confidence scores for each second of
|
|
// the entire time segment of the video that user specified in the input
|
|
// WARNING: Model evaluation is not done for this classification type, the
|
|
// quality of it depends on the training data, but there are no metrics
|
|
// provided to describe that quality. Default value is false
|
|
bool one_sec_interval_classification = 5;
|
|
}
|
|
|
|
// Prediction model parameters for Video Object Tracking.
|
|
message VideoObjectTrackingPredictionParams {
|
|
// The Model only returns predictions with at least this confidence score.
|
|
// Default value is 0.0
|
|
float confidence_threshold = 1;
|
|
|
|
// The model only returns up to that many top, by confidence score,
|
|
// predictions per frame of the video. If this number is very high, the
|
|
// Model may return fewer predictions per frame. Default value is 50.
|
|
int32 max_predictions = 2;
|
|
|
|
// Only bounding boxes with shortest edge at least that long as a relative
|
|
// value of video frame size are returned. Default value is 0.0.
|
|
float min_bounding_box_size = 3;
|
|
}
|
|
|
|
// Prediction model parameters for Video Action Recognition.
|
|
message VideoActionRecognitionPredictionParams {
|
|
// The Model only returns predictions with at least this confidence score.
|
|
// Default value is 0.0
|
|
float confidence_threshold = 1;
|
|
|
|
// The model only returns up to that many top, by confidence score,
|
|
// predictions per frame of the video. If this number is very high, the
|
|
// Model may return fewer predictions per frame. Default value is 50.
|
|
int32 max_predictions = 2;
|
|
}
|
|
|
|
// Represents a line of JSONL in the batch prediction output file.
|
|
message PredictionResult {
|
|
// Some identifier from the input so that the prediction can be mapped back to
|
|
// the input instance.
|
|
oneof input {
|
|
// User's input instance.
|
|
// Struct is used here instead of Any so that JsonFormat does not append an
|
|
// extra "@type" field when we convert the proto to JSON.
|
|
google.protobuf.Struct instance = 1;
|
|
|
|
// Optional user-provided key from the input instance.
|
|
string key = 2;
|
|
}
|
|
|
|
// The prediction result.
|
|
// Value is used here instead of Any so that JsonFormat does not append an
|
|
// extra "@type" field when we convert the proto to JSON and so we can
|
|
// represent array of objects.
|
|
google.protobuf.Value prediction = 3;
|
|
}
|
|
|
|
// Represents a line of JSONL in the text sentiment batch prediction output
|
|
// file. This is a hack to allow printing of integer values.
|
|
message TextSentimentPredictionResult {
|
|
// Prediction output format for Text Sentiment.
|
|
message Prediction {
|
|
// The integer sentiment labels between 0 (inclusive) and sentimentMax label
|
|
// (inclusive), while 0 maps to the least positive sentiment and
|
|
// sentimentMax maps to the most positive one. The higher the score is, the
|
|
// more positive the sentiment in the text snippet is. Note: sentimentMax is
|
|
// an integer value between 1 (inclusive) and 10 (inclusive).
|
|
int32 sentiment = 1;
|
|
}
|
|
|
|
// User's input instance.
|
|
TextSentimentPredictionInstance instance = 1;
|
|
|
|
// The prediction result.
|
|
Prediction prediction = 2;
|
|
}
|
|
|
|
// Prediction output format for Image Classification.
|
|
message ClassificationPredictionResult {
|
|
// The resource IDs of the AnnotationSpecs that had been identified, ordered
|
|
// by the confidence score descendingly.
|
|
repeated int64 ids = 1;
|
|
|
|
// The display names of the AnnotationSpecs that had been identified, order
|
|
// matches the IDs.
|
|
repeated string display_names = 2;
|
|
|
|
// The Model's confidences in correctness of the predicted IDs, higher value
|
|
// means higher confidence. Order matches the Ids.
|
|
repeated float confidences = 3;
|
|
}
|
|
|
|
// Prediction output format for Image Object Detection.
|
|
message ImageObjectDetectionPredictionResult {
|
|
// The resource IDs of the AnnotationSpecs that had been identified, ordered
|
|
// by the confidence score descendingly.
|
|
repeated int64 ids = 1;
|
|
|
|
// The display names of the AnnotationSpecs that had been identified, order
|
|
// matches the IDs.
|
|
repeated string display_names = 2;
|
|
|
|
// The Model's confidences in correctness of the predicted IDs, higher value
|
|
// means higher confidence. Order matches the Ids.
|
|
repeated float confidences = 3;
|
|
|
|
// Bounding boxes, i.e. the rectangles over the image, that pinpoint
|
|
// the found AnnotationSpecs. Given in order that matches the IDs. Each
|
|
// bounding box is an array of 4 numbers `xMin`, `xMax`, `yMin`, and
|
|
// `yMax`, which represent the extremal coordinates of the box. They are
|
|
// relative to the image size, and the point 0,0 is in the top left
|
|
// of the image.
|
|
repeated google.protobuf.ListValue bboxes = 4;
|
|
}
|
|
|
|
// Prediction output format for Video Classification.
|
|
message VideoClassificationPredictionResult {
|
|
// The resource ID of the AnnotationSpec that had been identified.
|
|
string id = 1;
|
|
|
|
// The display name of the AnnotationSpec that had been identified.
|
|
string display_name = 2;
|
|
|
|
// The type of the prediction. The requested types can be configured
|
|
// via parameters. This will be one of
|
|
// - segment-classification
|
|
// - shot-classification
|
|
// - one-sec-interval-classification
|
|
string type = 3;
|
|
|
|
// The beginning, inclusive, of the video's time segment in which the
|
|
// AnnotationSpec has been identified. Expressed as a number of seconds as
|
|
// measured from the start of the video, with fractions up to a microsecond
|
|
// precision, and with "s" appended at the end. Note that for
|
|
// 'segment-classification' prediction type, this equals the original
|
|
// 'timeSegmentStart' from the input instance, for other types it is the
|
|
// start of a shot or a 1 second interval respectively.
|
|
google.protobuf.Duration time_segment_start = 4;
|
|
|
|
// The end, exclusive, of the video's time segment in which the
|
|
// AnnotationSpec has been identified. Expressed as a number of seconds as
|
|
// measured from the start of the video, with fractions up to a microsecond
|
|
// precision, and with "s" appended at the end. Note that for
|
|
// 'segment-classification' prediction type, this equals the original
|
|
// 'timeSegmentEnd' from the input instance, for other types it is the end
|
|
// of a shot or a 1 second interval respectively.
|
|
google.protobuf.Duration time_segment_end = 5;
|
|
|
|
// The Model's confidence in correction of this prediction, higher
|
|
// value means higher confidence.
|
|
google.protobuf.FloatValue confidence = 6;
|
|
}
|
|
|
|
// Prediction output format for Video Object Tracking.
|
|
message VideoObjectTrackingPredictionResult {
|
|
// The fields `xMin`, `xMax`, `yMin`, and `yMax` refer to a bounding box,
|
|
// i.e. the rectangle over the video frame pinpointing the found
|
|
// AnnotationSpec. The coordinates are relative to the frame size, and the
|
|
// point 0,0 is in the top left of the frame.
|
|
message Frame {
|
|
// A time (frame) of a video in which the object has been detected.
|
|
// Expressed as a number of seconds as measured from the
|
|
// start of the video, with fractions up to a microsecond precision, and
|
|
// with "s" appended at the end.
|
|
google.protobuf.Duration time_offset = 1;
|
|
|
|
// The leftmost coordinate of the bounding box.
|
|
google.protobuf.FloatValue x_min = 2;
|
|
|
|
// The rightmost coordinate of the bounding box.
|
|
google.protobuf.FloatValue x_max = 3;
|
|
|
|
// The topmost coordinate of the bounding box.
|
|
google.protobuf.FloatValue y_min = 4;
|
|
|
|
// The bottommost coordinate of the bounding box.
|
|
google.protobuf.FloatValue y_max = 5;
|
|
}
|
|
|
|
// The resource ID of the AnnotationSpec that had been identified.
|
|
string id = 1;
|
|
|
|
// The display name of the AnnotationSpec that had been identified.
|
|
string display_name = 2;
|
|
|
|
// The beginning, inclusive, of the video's time segment in which the
|
|
// object instance has been detected. Expressed as a number of seconds as
|
|
// measured from the start of the video, with fractions up to a microsecond
|
|
// precision, and with "s" appended at the end.
|
|
google.protobuf.Duration time_segment_start = 3;
|
|
|
|
// The end, inclusive, of the video's time segment in which the
|
|
// object instance has been detected. Expressed as a number of seconds as
|
|
// measured from the start of the video, with fractions up to a microsecond
|
|
// precision, and with "s" appended at the end.
|
|
google.protobuf.Duration time_segment_end = 4;
|
|
|
|
// The Model's confidence in correction of this prediction, higher
|
|
// value means higher confidence.
|
|
google.protobuf.FloatValue confidence = 5;
|
|
|
|
// All of the frames of the video in which a single object instance has been
|
|
// detected. The bounding boxes in the frames identify the same object.
|
|
repeated Frame frames = 6;
|
|
}
|
|
|
|
// Prediction output format for Text Extraction.
|
|
message TextExtractionPredictionResult {
|
|
// The resource IDs of the AnnotationSpecs that had been identified,
|
|
// ordered by the confidence score descendingly.
|
|
repeated int64 ids = 1;
|
|
|
|
// The display names of the AnnotationSpecs that had been identified,
|
|
// order matches the IDs.
|
|
repeated string display_names = 2;
|
|
|
|
// The start offsets, inclusive, of the text segment in which the
|
|
// AnnotationSpec has been identified. Expressed as a zero-based number
|
|
// of characters as measured from the start of the text snippet.
|
|
repeated int64 text_segment_start_offsets = 3;
|
|
|
|
// The end offsets, inclusive, of the text segment in which the
|
|
// AnnotationSpec has been identified. Expressed as a zero-based number
|
|
// of characters as measured from the start of the text snippet.
|
|
repeated int64 text_segment_end_offsets = 4;
|
|
|
|
// The Model's confidences in correctness of the predicted IDs, higher
|
|
// value means higher confidence. Order matches the Ids.
|
|
repeated float confidences = 5;
|
|
}
|