Synchronize new proto/yaml changes.
PiperOrigin-RevId: 278627678
This commit is contained in:
parent
8c6569ced0
commit
f06bab1c11
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright 2018 Google LLC.
|
||||
// Copyright 2019 Google LLC.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
|
@ -18,6 +18,8 @@ syntax = "proto3";
|
|||
package google.cloud.videointelligence.v1p3beta1;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
import "google/api/client.proto";
|
||||
import "google/api/field_behavior.proto";
|
||||
import "google/longrunning/operations.proto";
|
||||
import "google/protobuf/duration.proto";
|
||||
import "google/protobuf/timestamp.proto";
|
||||
|
|
@ -29,10 +31,13 @@ option java_multiple_files = true;
|
|||
option java_outer_classname = "VideoIntelligenceServiceProto";
|
||||
option java_package = "com.google.cloud.videointelligence.v1p3beta1";
|
||||
option php_namespace = "Google\\Cloud\\VideoIntelligence\\V1p3beta1";
|
||||
option ruby_package = "Google::Cloud::VideoIntelligence::V1p3beta1";
|
||||
|
||||
// Service that implements Google Cloud Video Intelligence API.
|
||||
service VideoIntelligenceService {
|
||||
option (google.api.default_host) = "videointelligence.googleapis.com";
|
||||
option (google.api.oauth_scopes) =
|
||||
"https://www.googleapis.com/auth/cloud-platform";
|
||||
|
||||
// Performs asynchronous video annotation. Progress and results can be
|
||||
// retrieved through the `google.longrunning.Operations` interface.
|
||||
// `Operation.metadata` contains `AnnotateVideoProgress` (progress).
|
||||
|
|
@ -43,16 +48,25 @@ service VideoIntelligenceService {
|
|||
post: "/v1p3beta1/videos:annotate"
|
||||
body: "*"
|
||||
};
|
||||
option (google.api.method_signature) = "input_uri,features";
|
||||
option (google.longrunning.operation_info) = {
|
||||
response_type: "AnnotateVideoResponse"
|
||||
metadata_type: "AnnotateVideoProgress"
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Service that implements Google Cloud Video Intelligence Streaming API.
|
||||
// Service that implements streaming Google Cloud Video Intelligence API.
|
||||
service StreamingVideoIntelligenceService {
|
||||
option (google.api.default_host) = "videointelligence.googleapis.com";
|
||||
option (google.api.oauth_scopes) =
|
||||
"https://www.googleapis.com/auth/cloud-platform";
|
||||
|
||||
// Performs video annotation with bidirectional streaming: emitting results
|
||||
// while sending video/audio bytes.
|
||||
// This method is only available via the gRPC API (not REST).
|
||||
rpc StreamingAnnotateVideo(stream StreamingAnnotateVideoRequest)
|
||||
returns (stream StreamingAnnotateVideoResponse);
|
||||
returns (stream StreamingAnnotateVideoResponse) {}
|
||||
}
|
||||
|
||||
// Video annotation request.
|
||||
|
|
@ -74,24 +88,24 @@ message AnnotateVideoRequest {
|
|||
// If set, `input_uri` should be unset.
|
||||
bytes input_content = 6;
|
||||
|
||||
// Requested video annotation features.
|
||||
repeated Feature features = 2;
|
||||
// Required. Requested video annotation features.
|
||||
repeated Feature features = 2 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Additional video context and/or feature-specific parameters.
|
||||
VideoContext video_context = 3;
|
||||
|
||||
// Optional location where the output (in JSON format) should be stored.
|
||||
// Optional. Location where the output (in JSON format) should be stored.
|
||||
// Currently, only [Google Cloud Storage](https://cloud.google.com/storage/)
|
||||
// URIs are supported, which must be specified in the following format:
|
||||
// `gs://bucket-id/object-id` (other URI formats return
|
||||
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
|
||||
// more information, see [Request URIs](/storage/docs/reference-uris).
|
||||
string output_uri = 4;
|
||||
string output_uri = 4 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional cloud region where annotation should take place. Supported cloud
|
||||
// Optional. Cloud region where annotation should take place. Supported cloud
|
||||
// regions: `us-east1`, `us-west1`, `europe-west1`, `asia-east1`. If no region
|
||||
// is specified, a region will be determined based on video file location.
|
||||
string location_id = 5;
|
||||
string location_id = 5 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// Video context and/or feature-specific parameters.
|
||||
|
|
@ -290,8 +304,7 @@ message NormalizedBoundingBox {
|
|||
float bottom = 4;
|
||||
}
|
||||
|
||||
// For tracking related features, such as LOGO_RECOGNITION, FACE_DETECTION,
|
||||
// CELEBRITY_RECOGNITION, PERSON_DETECTION.
|
||||
// For tracking related features.
|
||||
// An object at time_offset with attributes, and located with
|
||||
// normalized_bounding_box.
|
||||
message TimestampedObject {
|
||||
|
|
@ -303,7 +316,8 @@ message TimestampedObject {
|
|||
google.protobuf.Duration time_offset = 2;
|
||||
|
||||
// Optional. The attributes of the object in the bounding box.
|
||||
repeated DetectedAttribute attributes = 3;
|
||||
repeated DetectedAttribute attributes = 3
|
||||
[(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// A track of an object instance.
|
||||
|
|
@ -315,10 +329,11 @@ message Track {
|
|||
repeated TimestampedObject timestamped_objects = 2;
|
||||
|
||||
// Optional. Attributes in the track level.
|
||||
repeated DetectedAttribute attributes = 3;
|
||||
repeated DetectedAttribute attributes = 3
|
||||
[(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. The confidence score of the tracked object.
|
||||
float confidence = 4;
|
||||
float confidence = 4 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// A generic detected attribute represented by name in string format.
|
||||
|
|
@ -335,20 +350,80 @@ message DetectedAttribute {
|
|||
string value = 3;
|
||||
}
|
||||
|
||||
// Celebrity definition.
|
||||
message Celebrity {
|
||||
// The resource name of the celebrity. Have the format
|
||||
// `video-intelligence/kg-mid` indicates a celebrity from preloaded gallery.
|
||||
// kg-mid is the id in Google knowledge graph, which is unique for the
|
||||
// celebrity.
|
||||
string name = 1;
|
||||
|
||||
// The celebrity name.
|
||||
string display_name = 2;
|
||||
|
||||
// Textual description of additional information about the celebrity, if
|
||||
// applicable.
|
||||
string description = 3;
|
||||
}
|
||||
|
||||
// The annotation result of a celebrity face track. RecognizedCelebrity field
|
||||
// could be empty if the face track does not have any matched celebrities.
|
||||
message CelebrityTrack {
|
||||
// The recognized celebrity with confidence score.
|
||||
message RecognizedCelebrity {
|
||||
// The recognized celebrity.
|
||||
Celebrity celebrity = 1;
|
||||
|
||||
// Recognition confidence. Range [0, 1].
|
||||
float confidence = 2;
|
||||
}
|
||||
|
||||
// Top N match of the celebrities for the face in this track.
|
||||
repeated RecognizedCelebrity celebrities = 1;
|
||||
|
||||
// A track of a person's face.
|
||||
Track face_track = 3;
|
||||
}
|
||||
|
||||
// Celebrity recognition annotation per video.
|
||||
message CelebrityRecognitionAnnotation {
|
||||
// The tracks detected from the input video, including recognized celebrities
|
||||
// and other detected faces in the video.
|
||||
repeated CelebrityTrack celebrity_tracks = 1;
|
||||
}
|
||||
|
||||
// Annotation results for a single video.
|
||||
message VideoAnnotationResults {
|
||||
// Video file location in
|
||||
// [Google Cloud Storage](https://cloud.google.com/storage/).
|
||||
string input_uri = 1;
|
||||
|
||||
// Label annotations on video level or user specified segment level.
|
||||
// Video segment on which the annotation is run.
|
||||
VideoSegment segment = 10;
|
||||
|
||||
// Topical label annotations on video level or user specified segment level.
|
||||
// There is exactly one element for each unique label.
|
||||
repeated LabelAnnotation segment_label_annotations = 2;
|
||||
|
||||
// Label annotations on shot level.
|
||||
// Presence label annotations on video level or user specified segment level.
|
||||
// There is exactly one element for each unique label. Compared to the
|
||||
// existing topical `segment_label_annotations`, this field presents more
|
||||
// fine-grained, segment-level labels detected in video content and is made
|
||||
// available only when the client sets `LabelDetectionConfig.model` to
|
||||
// "builtin/latest" in the request.
|
||||
repeated LabelAnnotation segment_presence_label_annotations = 23;
|
||||
|
||||
// Topical label annotations on shot level.
|
||||
// There is exactly one element for each unique label.
|
||||
repeated LabelAnnotation shot_label_annotations = 3;
|
||||
|
||||
// Presence label annotations on shot level. There is exactly one element for
|
||||
// each unique label. Compared to the existing topical
|
||||
// `shot_label_annotations`, this field presents more fine-grained, shot-level
|
||||
// labels detected in video content and is made available only when the client
|
||||
// sets `LabelDetectionConfig.model` to "builtin/latest" in the request.
|
||||
repeated LabelAnnotation shot_presence_label_annotations = 24;
|
||||
|
||||
// Label annotations on frame level.
|
||||
// There is exactly one element for each unique label.
|
||||
repeated LabelAnnotation frame_label_annotations = 4;
|
||||
|
|
@ -373,6 +448,9 @@ message VideoAnnotationResults {
|
|||
// Annotations for list of logos detected, tracked and recognized in video.
|
||||
repeated LogoRecognitionAnnotation logo_recognition_annotations = 19;
|
||||
|
||||
// Celebrity recognition annotations.
|
||||
CelebrityRecognitionAnnotation celebrity_recognition_annotations = 21;
|
||||
|
||||
// If set, indicates an error. Note that for a single `AnnotateVideoRequest`
|
||||
// some videos may succeed and some may fail.
|
||||
google.rpc.Status error = 9;
|
||||
|
|
@ -401,6 +479,14 @@ message VideoAnnotationProgress {
|
|||
|
||||
// Time of the most recent update.
|
||||
google.protobuf.Timestamp update_time = 4;
|
||||
|
||||
// Specifies which feature is being tracked if the request contains more than
|
||||
// one features.
|
||||
Feature feature = 5;
|
||||
|
||||
// Specifies which segment is being tracked if the request contains more than
|
||||
// one segments.
|
||||
VideoSegment segment = 6;
|
||||
}
|
||||
|
||||
// Video annotation progress. Included in the `metadata`
|
||||
|
|
@ -413,72 +499,73 @@ message AnnotateVideoProgress {
|
|||
|
||||
// Config for SPEECH_TRANSCRIPTION.
|
||||
message SpeechTranscriptionConfig {
|
||||
// *Required* The language of the supplied audio as a
|
||||
// Required. *Required* The language of the supplied audio as a
|
||||
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
|
||||
// Example: "en-US".
|
||||
// See [Language Support](https://cloud.google.com/speech/docs/languages)
|
||||
// for a list of the currently supported language codes.
|
||||
string language_code = 1;
|
||||
string language_code = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// *Optional* Maximum number of recognition hypotheses to be returned.
|
||||
// Optional. Maximum number of recognition hypotheses to be returned.
|
||||
// Specifically, the maximum number of `SpeechRecognitionAlternative` messages
|
||||
// within each `SpeechTranscription`. The server may return fewer than
|
||||
// `max_alternatives`. Valid values are `0`-`30`. A value of `0` or `1` will
|
||||
// return a maximum of one. If omitted, will return a maximum of one.
|
||||
int32 max_alternatives = 2;
|
||||
int32 max_alternatives = 2 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// *Optional* If set to `true`, the server will attempt to filter out
|
||||
// Optional. If set to `true`, the server will attempt to filter out
|
||||
// profanities, replacing all but the initial character in each filtered word
|
||||
// with asterisks, e.g. "f***". If set to `false` or omitted, profanities
|
||||
// won't be filtered out.
|
||||
bool filter_profanity = 3;
|
||||
bool filter_profanity = 3 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// *Optional* A means to provide context to assist the speech recognition.
|
||||
repeated SpeechContext speech_contexts = 4;
|
||||
// Optional. A means to provide context to assist the speech recognition.
|
||||
repeated SpeechContext speech_contexts = 4
|
||||
[(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// *Optional* If 'true', adds punctuation to recognition result hypotheses.
|
||||
// Optional. If 'true', adds punctuation to recognition result hypotheses.
|
||||
// This feature is only available in select languages. Setting this for
|
||||
// requests in other languages has no effect at all. The default 'false' value
|
||||
// does not add punctuation to result hypotheses. NOTE: "This is currently
|
||||
// offered as an experimental service, complimentary to all users. In the
|
||||
// future this may be exclusively available as a premium feature."
|
||||
bool enable_automatic_punctuation = 5;
|
||||
bool enable_automatic_punctuation = 5
|
||||
[(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// *Optional* For file formats, such as MXF or MKV, supporting multiple audio
|
||||
// Optional. For file formats, such as MXF or MKV, supporting multiple audio
|
||||
// tracks, specify up to two tracks. Default: track 0.
|
||||
repeated int32 audio_tracks = 6;
|
||||
repeated int32 audio_tracks = 6 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// *Optional* If 'true', enables speaker detection for each recognized word in
|
||||
// Optional. If 'true', enables speaker detection for each recognized word in
|
||||
// the top alternative of the recognition result using a speaker_tag provided
|
||||
// in the WordInfo.
|
||||
// Note: When this is true, we send all the words from the beginning of the
|
||||
// audio for the top alternative in every consecutive responses.
|
||||
// This is done in order to improve our speaker tags as our models learn to
|
||||
// identify the speakers in the conversation over time.
|
||||
bool enable_speaker_diarization = 7;
|
||||
bool enable_speaker_diarization = 7 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// *Optional*
|
||||
// If set, specifies the estimated number of speakers in the conversation.
|
||||
// If not set, defaults to '2'.
|
||||
// Ignored unless enable_speaker_diarization is set to true.
|
||||
int32 diarization_speaker_count = 8;
|
||||
// Optional. If set, specifies the estimated number of speakers in the
|
||||
// conversation. If not set, defaults to '2'. Ignored unless
|
||||
// enable_speaker_diarization is set to true.
|
||||
int32 diarization_speaker_count = 8 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// *Optional* If `true`, the top result includes a list of words and the
|
||||
// Optional. If `true`, the top result includes a list of words and the
|
||||
// confidence for those words. If `false`, no word-level confidence
|
||||
// information is returned. The default is `false`.
|
||||
bool enable_word_confidence = 9;
|
||||
bool enable_word_confidence = 9 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// Provides "hints" to the speech recognizer to favor specific words and phrases
|
||||
// in the results.
|
||||
message SpeechContext {
|
||||
// *Optional* A list of strings containing words and phrases "hints" so that
|
||||
// Optional. A list of strings containing words and phrases "hints" so that
|
||||
// the speech recognition is more likely to recognize them. This can be used
|
||||
// to improve the accuracy for specific words and phrases, for example, if
|
||||
// specific commands are typically spoken by the user. This can also be used
|
||||
// to add additional words to the vocabulary of the recognizer. See
|
||||
// [usage limits](https://cloud.google.com/speech/limits#content).
|
||||
repeated string phrases = 1;
|
||||
repeated string phrases = 1 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// A speech recognition result corresponding to a portion of the audio.
|
||||
|
|
@ -489,11 +576,10 @@ message SpeechTranscription {
|
|||
// ranked by the recognizer.
|
||||
repeated SpeechRecognitionAlternative alternatives = 1;
|
||||
|
||||
// Output only. The
|
||||
// [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
|
||||
// language in this result. This language code was detected to have the most
|
||||
// likelihood of being spoken in the audio.
|
||||
string language_code = 2;
|
||||
// Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
|
||||
// language tag of the language in this result. This language code was
|
||||
// detected to have the most likelihood of being spoken in the audio.
|
||||
string language_code = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
}
|
||||
|
||||
// Alternative hypotheses (a.k.a. n-best list).
|
||||
|
|
@ -501,16 +587,18 @@ message SpeechRecognitionAlternative {
|
|||
// Transcript text representing the words that the user spoke.
|
||||
string transcript = 1;
|
||||
|
||||
// The confidence estimate between 0.0 and 1.0. A higher number
|
||||
// Output only. The confidence estimate between 0.0 and 1.0. A higher number
|
||||
// indicates an estimated greater likelihood that the recognized words are
|
||||
// correct. This field is typically provided only for the top hypothesis, and
|
||||
// only for `is_final=true` results. Clients should not rely on the
|
||||
// `confidence` field as it is not guaranteed to be accurate or consistent.
|
||||
// correct. This field is set only for the top alternative.
|
||||
// This field is not guaranteed to be accurate and users should not rely on it
|
||||
// to be always provided.
|
||||
// The default of 0.0 is a sentinel value indicating `confidence` was not set.
|
||||
float confidence = 2;
|
||||
float confidence = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
|
||||
// A list of word-specific information for each recognized word.
|
||||
repeated WordInfo words = 3;
|
||||
// Output only. A list of word-specific information for each recognized word.
|
||||
// Note: When `enable_speaker_diarization` is true, you will see all the words
|
||||
// from the beginning of the audio.
|
||||
repeated WordInfo words = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
}
|
||||
|
||||
// Word-specific information for recognized words. Word information is only
|
||||
|
|
@ -538,13 +626,13 @@ message WordInfo {
|
|||
// This field is not guaranteed to be accurate and users should not rely on it
|
||||
// to be always provided.
|
||||
// The default of 0.0 is a sentinel value indicating `confidence` was not set.
|
||||
float confidence = 4;
|
||||
float confidence = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
|
||||
// Output only. A distinct integer value is assigned for every speaker within
|
||||
// the audio. This field specifies which one of those speakers was detected to
|
||||
// have spoken this word. Value ranges from 1 up to diarization_speaker_count,
|
||||
// and is only set if speaker diarization is enabled.
|
||||
int32 speaker_tag = 5;
|
||||
int32 speaker_tag = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
}
|
||||
|
||||
// A vertex represents a 2D point in the image.
|
||||
|
|
@ -645,6 +733,7 @@ message ObjectTrackingAnnotation {
|
|||
// Non-streaming batch mode ONLY.
|
||||
// Each object track corresponds to one video segment where it appears.
|
||||
VideoSegment segment = 3;
|
||||
|
||||
// Streaming mode ONLY.
|
||||
// In streaming mode, we do not know the end time of a tracked object
|
||||
// before it is completed. Hence, there is no VideoSegment info returned.
|
||||
|
|
@ -712,26 +801,24 @@ message StreamingAnnotateVideoResponse {
|
|||
string annotation_results_uri = 3;
|
||||
}
|
||||
|
||||
// Config for AUTOML_CLASSIFICATION in streaming mode.
|
||||
// Config for STREAMING_AUTOML_CLASSIFICATION.
|
||||
message StreamingAutomlClassificationConfig {
|
||||
// Resource name of AutoML model.
|
||||
// Format: `projects/{project_id}/locations/{location_id}/models/{model_id}`
|
||||
string model_name = 1;
|
||||
}
|
||||
|
||||
// Config for AUTOML_OBJECT_TRACKING in streaming mode.
|
||||
// Config for STREAMING_AUTOML_OBJECT_TRACKING.
|
||||
message StreamingAutomlObjectTrackingConfig {
|
||||
// Resource name of AutoML model.
|
||||
// Format: `projects/{project_id}/locations/{location_id}/models/{model_id}`
|
||||
string model_name = 1;
|
||||
}
|
||||
|
||||
// Config for EXPLICIT_CONTENT_DETECTION in streaming mode.
|
||||
message StreamingExplicitContentDetectionConfig {
|
||||
// No customized config support.
|
||||
}
|
||||
// Config for STREAMING_EXPLICIT_CONTENT_DETECTION.
|
||||
message StreamingExplicitContentDetectionConfig {}
|
||||
|
||||
// Config for LABEL_DETECTION in streaming mode.
|
||||
// Config for STREAMING_LABEL_DETECTION.
|
||||
message StreamingLabelDetectionConfig {
|
||||
// Whether the video has been captured from a stationary (i.e. non-moving)
|
||||
// camera. When set to true, might improve detection accuracy for moving
|
||||
|
|
@ -740,14 +827,10 @@ message StreamingLabelDetectionConfig {
|
|||
}
|
||||
|
||||
// Config for STREAMING_OBJECT_TRACKING.
|
||||
message StreamingObjectTrackingConfig {
|
||||
// No customized config support.
|
||||
}
|
||||
message StreamingObjectTrackingConfig {}
|
||||
|
||||
// Config for SHOT_CHANGE_DETECTION in streaming mode.
|
||||
message StreamingShotChangeDetectionConfig {
|
||||
// No customized config support.
|
||||
}
|
||||
// Config for STREAMING_SHOT_CHANGE_DETECTION.
|
||||
message StreamingShotChangeDetectionConfig {}
|
||||
|
||||
// Config for streaming storage option.
|
||||
message StreamingStorageConfig {
|
||||
|
|
@ -840,6 +923,9 @@ enum Feature {
|
|||
|
||||
// Logo detection, tracking, and recognition.
|
||||
LOGO_RECOGNITION = 12;
|
||||
|
||||
// Celebrity recognition.
|
||||
CELEBRITY_RECOGNITION = 13;
|
||||
}
|
||||
|
||||
// Label detection mode.
|
||||
|
|
@ -882,16 +968,22 @@ enum Likelihood {
|
|||
enum StreamingFeature {
|
||||
// Unspecified.
|
||||
STREAMING_FEATURE_UNSPECIFIED = 0;
|
||||
|
||||
// Label detection. Detect objects, such as dog or flower.
|
||||
STREAMING_LABEL_DETECTION = 1;
|
||||
|
||||
// Shot change detection.
|
||||
STREAMING_SHOT_CHANGE_DETECTION = 2;
|
||||
|
||||
// Explicit content detection.
|
||||
STREAMING_EXPLICIT_CONTENT_DETECTION = 3;
|
||||
|
||||
// Object detection and tracking.
|
||||
STREAMING_OBJECT_TRACKING = 4;
|
||||
|
||||
// Video classification based on AutoML model.
|
||||
STREAMING_AUTOML_CLASSIFICATION = 21;
|
||||
|
||||
// Object detection and tracking based on AutoML model.
|
||||
STREAMING_AUTOML_OBJECT_TRACKING = 22;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue