Files
googleapis/google/cloud/aiplatform/v1beta1/schema/io_format.proto
Google APIs e3e7e7ddb0 feat: added prediction and training proto files for enhanced AI Platform client libraries.
Provides defined Message types for formatting data assigned to protobuf.Value fields in AI Platform.

PiperOrigin-RevId: 342967619
2020-11-17 16:11:56 -08:00

481 lines
19 KiB
Protocol Buffer

// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.aiplatform.v1beta1.schema;
import "google/cloud/aiplatform/v1beta1/schema/annotation_spec_color.proto";
import "google/cloud/aiplatform/v1beta1/schema/geometry.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/struct.proto";
import "google/protobuf/wrappers.proto";
import "google/api/annotations.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/aiplatform/v1beta1/schema;schema";
option java_multiple_files = true;
option java_outer_classname = "IoFormatProto";
option java_package = "com.google.cloud.aiplatform.v1beta1.schema";
// Prediction input format for Image Classification.
message ImageClassificationPredictionInstance {
// The image bytes or GCS URI to make the prediction on.
string content = 1;
// The MIME type of the content of the image. Only the images in below listed
// MIME types are supported.
// - image/jpeg
// - image/gif
// - image/png
// - image/webp
// - image/bmp
// - image/tiff
// - image/vnd.microsoft.icon
string mime_type = 2;
}
// Prediction input format for Image Object Detection.
message ImageObjectDetectionPredictionInstance {
// The image bytes or GCS URI to make the prediction on.
string content = 1;
// The MIME type of the content of the image. Only the images in below listed
// MIME types are supported.
// - image/jpeg
// - image/gif
// - image/png
// - image/webp
// - image/bmp
// - image/tiff
// - image/vnd.microsoft.icon
string mime_type = 2;
}
// Prediction input format for Image Segmentation.
message ImageSegmentationPredictionInstance {
// The image bytes to make the predictions on.
string content = 1;
// The MIME type of the content of the image. Only the images in below listed
// MIME types are supported.
// - image/jpeg
// - image/png
string mime_type = 2;
}
// Prediction input format for Video Classification.
message VideoClassificationPredictionInstance {
// The Google Cloud Storage location of the video on which to perform the
// prediction.
string content = 1;
// The MIME type of the content of the video. Only the following are
// supported: video/mp4 video/avi video/quicktime
string mime_type = 2;
// The beginning, inclusive, of the video's time segment on which to perform
// the prediction. Expressed as a number of seconds as measured from the
// start of the video, with "s" appended at the end. Fractions are allowed,
// up to a microsecond precision.
string time_segment_start = 3;
// The end, exclusive, of the video's time segment on which to perform
// the prediction. Expressed as a number of seconds as measured from the
// start of the video, with "s" appended at the end. Fractions are allowed,
// up to a microsecond precision, and "Infinity" is allowed, which means the
// end of the video.
string time_segment_end = 4;
}
// Prediction input format for Video Classification.
message VideoObjectTrackingPredictionInstance {
// The Google Cloud Storage location of the video on which to perform the
// prediction.
string content = 1;
// The MIME type of the content of the video. Only the following are
// supported: video/mp4 video/avi video/quicktime
string mime_type = 2;
// The beginning, inclusive, of the video's time segment on which to perform
// the prediction. Expressed as a number of seconds as measured from the
// start of the video, with "s" appended at the end. Fractions are allowed,
// up to a microsecond precision.
string time_segment_start = 3;
// The end, exclusive, of the video's time segment on which to perform
// the prediction. Expressed as a number of seconds as measured from the
// start of the video, with "s" appended at the end. Fractions are allowed,
// up to a microsecond precision, and "Infinity" is allowed, which means the
// end of the video.
string time_segment_end = 4;
}
// Prediction input format for Video Action Recognition.
message VideoActionRecognitionPredictionInstance {
// The Google Cloud Storage location of the video on which to perform the
// prediction.
string content = 1;
// The MIME type of the content of the video. Only the following are
// supported: video/mp4 video/avi video/quicktime
string mime_type = 2;
// The beginning, inclusive, of the video's time segment on which to perform
// the prediction. Expressed as a number of seconds as measured from the
// start of the video, with "s" appended at the end. Fractions are allowed,
// up to a microsecond precision.
string time_segment_start = 3;
// The end, exclusive, of the video's time segment on which to perform
// the prediction. Expressed as a number of seconds as measured from the
// start of the video, with "s" appended at the end. Fractions are allowed,
// up to a microsecond precision, and "Infinity" is allowed, which means the
// end of the video.
string time_segment_end = 4;
}
// Prediction input format for Text Classification.
message TextClassificationPredictionInstance {
// The text snippet to make the predictions on.
string content = 1;
// The MIME type of the text snippet. The supported MIME types are listed
// below.
// - text/plain
string mime_type = 2;
}
// Prediction input format for Text Sentiment.
message TextSentimentPredictionInstance {
// The text snippet to make the predictions on.
string content = 1;
// The MIME type of the text snippet. The supported MIME types are listed
// below.
// - text/plain
string mime_type = 2;
}
// Prediction input format for Text Extraction.
message TextExtractionPredictionInstance {
// The text snippet to make the predictions on.
string content = 1;
// The MIME type of the text snippet. The supported MIME types are listed
// below.
// - text/plain
string mime_type = 2;
// This field is only used for batch prediction. If a key is provided, the
// batch prediction result will by mapped to this key. If omitted, then the
// batch prediction result will contain the entire input instance. AI Platform
// will not check if keys in the request are duplicates, so it is up to the
// caller to ensure the keys are unique.
string key = 3;
}
// Prediction model parameters for Image Classification.
message ImageClassificationPredictionParams {
// The Model only returns predictions with at least this confidence score.
// Default value is 0.0
float confidence_threshold = 1;
// The Model only returns up to that many top, by confidence score,
// predictions per instance. If this number is very high, the Model may return
// fewer predictions. Default value is 10.
int32 max_predictions = 2;
}
// Prediction model parameters for Image Object Detection.
message ImageObjectDetectionPredictionParams {
// The Model only returns predictions with at least this confidence score.
// Default value is 0.0
float confidence_threshold = 1;
// The Model only returns up to that many top, by confidence score,
// predictions per instance. Note that number of returned predictions is also
// limited by metadata's predictionsLimit. Default value is 10.
int32 max_predictions = 2;
}
// Prediction model parameters for Image Segmentation.
message ImageSegmentationPredictionParams {
// When the model predicts category of pixels of the image, it will only
// provide predictions for pixels that it is at least this much confident
// about. All other pixels will be classified as background. Default value is
// 0.5.
float confidence_threshold = 1;
}
// Prediction model parameters for Video Classification.
message VideoClassificationPredictionParams {
// The Model only returns predictions with at least this confidence score.
// Default value is 0.0
float confidence_threshold = 1;
// The Model only returns up to that many top, by confidence score,
// predictions per instance. If this number is very high, the Model may return
// fewer predictions. Default value is 10,000.
int32 max_predictions = 2;
// Set to true to request segment-level classification. AI Platform returns
// labels and their confidence scores for the entire time segment of the
// video that user specified in the input instance.
// Default value is true
bool segment_classification = 3;
// Set to true to request shot-level classification. AI Platform determines
// the boundaries for each camera shot in the entire time segment of the
// video that user specified in the input instance. AI Platform then
// returns labels and their confidence scores for each detected shot, along
// with the start and end time of the shot.
// WARNING: Model evaluation is not done for this classification type,
// the quality of it depends on the training data, but there are no metrics
// provided to describe that quality.
// Default value is false
bool shot_classification = 4;
// Set to true to request classification for a video at one-second intervals.
// AI Platform returns labels and their confidence scores for each second of
// the entire time segment of the video that user specified in the input
// WARNING: Model evaluation is not done for this classification type, the
// quality of it depends on the training data, but there are no metrics
// provided to describe that quality. Default value is false
bool one_sec_interval_classification = 5;
}
// Prediction model parameters for Video Object Tracking.
message VideoObjectTrackingPredictionParams {
// The Model only returns predictions with at least this confidence score.
// Default value is 0.0
float confidence_threshold = 1;
// The model only returns up to that many top, by confidence score,
// predictions per frame of the video. If this number is very high, the
// Model may return fewer predictions per frame. Default value is 50.
int32 max_predictions = 2;
// Only bounding boxes with shortest edge at least that long as a relative
// value of video frame size are returned. Default value is 0.0.
float min_bounding_box_size = 3;
}
// Prediction model parameters for Video Action Recognition.
message VideoActionRecognitionPredictionParams {
// The Model only returns predictions with at least this confidence score.
// Default value is 0.0
float confidence_threshold = 1;
// The model only returns up to that many top, by confidence score,
// predictions per frame of the video. If this number is very high, the
// Model may return fewer predictions per frame. Default value is 50.
int32 max_predictions = 2;
}
// Represents a line of JSONL in the batch prediction output file.
message PredictionResult {
// Some identifier from the input so that the prediction can be mapped back to
// the input instance.
oneof input {
// User's input instance.
// Struct is used here instead of Any so that JsonFormat does not append an
// extra "@type" field when we convert the proto to JSON.
google.protobuf.Struct instance = 1;
// Optional user-provided key from the input instance.
string key = 2;
}
// The prediction result.
// Value is used here instead of Any so that JsonFormat does not append an
// extra "@type" field when we convert the proto to JSON and so we can
// represent array of objects.
google.protobuf.Value prediction = 3;
}
// Represents a line of JSONL in the text sentiment batch prediction output
// file. This is a hack to allow printing of integer values.
message TextSentimentPredictionResult {
// Prediction output format for Text Sentiment.
message Prediction {
// The integer sentiment labels between 0 (inclusive) and sentimentMax label
// (inclusive), while 0 maps to the least positive sentiment and
// sentimentMax maps to the most positive one. The higher the score is, the
// more positive the sentiment in the text snippet is. Note: sentimentMax is
// an integer value between 1 (inclusive) and 10 (inclusive).
int32 sentiment = 1;
}
// User's input instance.
TextSentimentPredictionInstance instance = 1;
// The prediction result.
Prediction prediction = 2;
}
// Prediction output format for Image Classification.
message ClassificationPredictionResult {
// The resource IDs of the AnnotationSpecs that had been identified, ordered
// by the confidence score descendingly.
repeated int64 ids = 1;
// The display names of the AnnotationSpecs that had been identified, order
// matches the IDs.
repeated string display_names = 2;
// The Model's confidences in correctness of the predicted IDs, higher value
// means higher confidence. Order matches the Ids.
repeated float confidences = 3;
}
// Prediction output format for Image Object Detection.
message ImageObjectDetectionPredictionResult {
// The resource IDs of the AnnotationSpecs that had been identified, ordered
// by the confidence score descendingly.
repeated int64 ids = 1;
// The display names of the AnnotationSpecs that had been identified, order
// matches the IDs.
repeated string display_names = 2;
// The Model's confidences in correctness of the predicted IDs, higher value
// means higher confidence. Order matches the Ids.
repeated float confidences = 3;
// Bounding boxes, i.e. the rectangles over the image, that pinpoint
// the found AnnotationSpecs. Given in order that matches the IDs. Each
// bounding box is an array of 4 numbers `xMin`, `xMax`, `yMin`, and
// `yMax`, which represent the extremal coordinates of the box. They are
// relative to the image size, and the point 0,0 is in the top left
// of the image.
repeated google.protobuf.ListValue bboxes = 4;
}
// Prediction output format for Video Classification.
message VideoClassificationPredictionResult {
// The resource ID of the AnnotationSpec that had been identified.
string id = 1;
// The display name of the AnnotationSpec that had been identified.
string display_name = 2;
// The type of the prediction. The requested types can be configured
// via parameters. This will be one of
// - segment-classification
// - shot-classification
// - one-sec-interval-classification
string type = 3;
// The beginning, inclusive, of the video's time segment in which the
// AnnotationSpec has been identified. Expressed as a number of seconds as
// measured from the start of the video, with fractions up to a microsecond
// precision, and with "s" appended at the end. Note that for
// 'segment-classification' prediction type, this equals the original
// 'timeSegmentStart' from the input instance, for other types it is the
// start of a shot or a 1 second interval respectively.
google.protobuf.Duration time_segment_start = 4;
// The end, exclusive, of the video's time segment in which the
// AnnotationSpec has been identified. Expressed as a number of seconds as
// measured from the start of the video, with fractions up to a microsecond
// precision, and with "s" appended at the end. Note that for
// 'segment-classification' prediction type, this equals the original
// 'timeSegmentEnd' from the input instance, for other types it is the end
// of a shot or a 1 second interval respectively.
google.protobuf.Duration time_segment_end = 5;
// The Model's confidence in correction of this prediction, higher
// value means higher confidence.
google.protobuf.FloatValue confidence = 6;
}
// Prediction output format for Video Object Tracking.
message VideoObjectTrackingPredictionResult {
// The fields `xMin`, `xMax`, `yMin`, and `yMax` refer to a bounding box,
// i.e. the rectangle over the video frame pinpointing the found
// AnnotationSpec. The coordinates are relative to the frame size, and the
// point 0,0 is in the top left of the frame.
message Frame {
// A time (frame) of a video in which the object has been detected.
// Expressed as a number of seconds as measured from the
// start of the video, with fractions up to a microsecond precision, and
// with "s" appended at the end.
google.protobuf.Duration time_offset = 1;
// The leftmost coordinate of the bounding box.
google.protobuf.FloatValue x_min = 2;
// The rightmost coordinate of the bounding box.
google.protobuf.FloatValue x_max = 3;
// The topmost coordinate of the bounding box.
google.protobuf.FloatValue y_min = 4;
// The bottommost coordinate of the bounding box.
google.protobuf.FloatValue y_max = 5;
}
// The resource ID of the AnnotationSpec that had been identified.
string id = 1;
// The display name of the AnnotationSpec that had been identified.
string display_name = 2;
// The beginning, inclusive, of the video's time segment in which the
// object instance has been detected. Expressed as a number of seconds as
// measured from the start of the video, with fractions up to a microsecond
// precision, and with "s" appended at the end.
google.protobuf.Duration time_segment_start = 3;
// The end, inclusive, of the video's time segment in which the
// object instance has been detected. Expressed as a number of seconds as
// measured from the start of the video, with fractions up to a microsecond
// precision, and with "s" appended at the end.
google.protobuf.Duration time_segment_end = 4;
// The Model's confidence in correction of this prediction, higher
// value means higher confidence.
google.protobuf.FloatValue confidence = 5;
// All of the frames of the video in which a single object instance has been
// detected. The bounding boxes in the frames identify the same object.
repeated Frame frames = 6;
}
// Prediction output format for Text Extraction.
message TextExtractionPredictionResult {
// The resource IDs of the AnnotationSpecs that had been identified,
// ordered by the confidence score descendingly.
repeated int64 ids = 1;
// The display names of the AnnotationSpecs that had been identified,
// order matches the IDs.
repeated string display_names = 2;
// The start offsets, inclusive, of the text segment in which the
// AnnotationSpec has been identified. Expressed as a zero-based number
// of characters as measured from the start of the text snippet.
repeated int64 text_segment_start_offsets = 3;
// The end offsets, inclusive, of the text segment in which the
// AnnotationSpec has been identified. Expressed as a zero-based number
// of characters as measured from the start of the text snippet.
repeated int64 text_segment_end_offsets = 4;
// The Model's confidences in correctness of the predicted IDs, higher
// value means higher confidence. Order matches the Ids.
repeated float confidences = 5;
}