Synchronize new proto/yaml changes.

PiperOrigin-RevId: 278627678
2019-11-05 08:08:04 -08:00 · 2019-11-05 08:08:04 -08:00 · f06bab1c11
parent 8c6569ced0
commit f06bab1c11
1 changed files with 160 additions and 68 deletions
--- a/google/cloud/videointelligence/v1p3beta1/video_intelligence.proto
+++ b/google/cloud/videointelligence/v1p3beta1/video_intelligence.proto
@ -1,4 +1,4 @@
-// Copyright 2018 Google LLC.
+// Copyright 2019 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -18,6 +18,8 @@ syntax = "proto3";
 package google.cloud.videointelligence.v1p3beta1;

 import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
 import "google/longrunning/operations.proto";
 import "google/protobuf/duration.proto";
 import "google/protobuf/timestamp.proto";
@ -29,10 +31,13 @@ option java_multiple_files = true;
 option java_outer_classname = "VideoIntelligenceServiceProto";
 option java_package = "com.google.cloud.videointelligence.v1p3beta1";
 option php_namespace = "Google\\Cloud\\VideoIntelligence\\V1p3beta1";
-option ruby_package = "Google::Cloud::VideoIntelligence::V1p3beta1";

 // Service that implements Google Cloud Video Intelligence API.
 service VideoIntelligenceService {
+  option (google.api.default_host) = "videointelligence.googleapis.com";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
+
  // Performs asynchronous video annotation. Progress and results can be
  // retrieved through the `google.longrunning.Operations` interface.
  // `Operation.metadata` contains `AnnotateVideoProgress` (progress).
@ -43,16 +48,25 @@ service VideoIntelligenceService {
      post: "/v1p3beta1/videos:annotate"
      body: "*"
    };
+    option (google.api.method_signature) = "input_uri,features";
+    option (google.longrunning.operation_info) = {
+      response_type: "AnnotateVideoResponse"
+      metadata_type: "AnnotateVideoProgress"
+    };
  }
 }

-// Service that implements Google Cloud Video Intelligence Streaming API.
+// Service that implements streaming Google Cloud Video Intelligence API.
 service StreamingVideoIntelligenceService {
+  option (google.api.default_host) = "videointelligence.googleapis.com";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
+
  // Performs video annotation with bidirectional streaming: emitting results
  // while sending video/audio bytes.
  // This method is only available via the gRPC API (not REST).
  rpc StreamingAnnotateVideo(stream StreamingAnnotateVideoRequest)
-      returns (stream StreamingAnnotateVideoResponse);
+      returns (stream StreamingAnnotateVideoResponse) {}
 }

 // Video annotation request.
@ -74,24 +88,24 @@ message AnnotateVideoRequest {
  // If set, `input_uri` should be unset.
  bytes input_content = 6;

-  // Requested video annotation features.
-  repeated Feature features = 2;
+  // Required. Requested video annotation features.
+  repeated Feature features = 2 [(google.api.field_behavior) = REQUIRED];

  // Additional video context and/or feature-specific parameters.
  VideoContext video_context = 3;

-  // Optional location where the output (in JSON format) should be stored.
+  // Optional. Location where the output (in JSON format) should be stored.
  // Currently, only [Google Cloud Storage](https://cloud.google.com/storage/)
  // URIs are supported, which must be specified in the following format:
  // `gs://bucket-id/object-id` (other URI formats return
  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
  // more information, see [Request URIs](/storage/docs/reference-uris).
-  string output_uri = 4;
+  string output_uri = 4 [(google.api.field_behavior) = OPTIONAL];

-  // Optional cloud region where annotation should take place. Supported cloud
+  // Optional. Cloud region where annotation should take place. Supported cloud
  // regions: `us-east1`, `us-west1`, `europe-west1`, `asia-east1`. If no region
  // is specified, a region will be determined based on video file location.
-  string location_id = 5;
+  string location_id = 5 [(google.api.field_behavior) = OPTIONAL];
 }

 // Video context and/or feature-specific parameters.
@ -290,8 +304,7 @@ message NormalizedBoundingBox {
  float bottom = 4;
 }

-// For tracking related features, such as LOGO_RECOGNITION, FACE_DETECTION,
-// CELEBRITY_RECOGNITION, PERSON_DETECTION.
+// For tracking related features.
 // An object at time_offset with attributes, and located with
 // normalized_bounding_box.
 message TimestampedObject {
@ -303,7 +316,8 @@ message TimestampedObject {
  google.protobuf.Duration time_offset = 2;

  // Optional. The attributes of the object in the bounding box.
-  repeated DetectedAttribute attributes = 3;
+  repeated DetectedAttribute attributes = 3
+      [(google.api.field_behavior) = OPTIONAL];
 }

 // A track of an object instance.
@ -315,10 +329,11 @@ message Track {
  repeated TimestampedObject timestamped_objects = 2;

  // Optional. Attributes in the track level.
-  repeated DetectedAttribute attributes = 3;
+  repeated DetectedAttribute attributes = 3
+      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The confidence score of the tracked object.
-  float confidence = 4;
+  float confidence = 4 [(google.api.field_behavior) = OPTIONAL];
 }

 // A generic detected attribute represented by name in string format.
@ -335,20 +350,80 @@ message DetectedAttribute {
  string value = 3;
 }

+// Celebrity definition.
+message Celebrity {
+  // The resource name of the celebrity. Have the format
+  // `video-intelligence/kg-mid` indicates a celebrity from preloaded gallery.
+  // kg-mid is the id in Google knowledge graph, which is unique for the
+  // celebrity.
+  string name = 1;
+
+  // The celebrity name.
+  string display_name = 2;
+
+  // Textual description of additional information about the celebrity, if
+  // applicable.
+  string description = 3;
+}
+
+// The annotation result of a celebrity face track. RecognizedCelebrity field
+// could be empty if the face track does not have any matched celebrities.
+message CelebrityTrack {
+  // The recognized celebrity with confidence score.
+  message RecognizedCelebrity {
+    // The recognized celebrity.
+    Celebrity celebrity = 1;
+
+    // Recognition confidence. Range [0, 1].
+    float confidence = 2;
+  }
+
+  // Top N match of the celebrities for the face in this track.
+  repeated RecognizedCelebrity celebrities = 1;
+
+  // A track of a person's face.
+  Track face_track = 3;
+}
+
+// Celebrity recognition annotation per video.
+message CelebrityRecognitionAnnotation {
+  // The tracks detected from the input video, including recognized celebrities
+  // and other detected faces in the video.
+  repeated CelebrityTrack celebrity_tracks = 1;
+}
+
 // Annotation results for a single video.
 message VideoAnnotationResults {
  // Video file location in
  // [Google Cloud Storage](https://cloud.google.com/storage/).
  string input_uri = 1;

-  // Label annotations on video level or user specified segment level.
+  // Video segment on which the annotation is run.
+  VideoSegment segment = 10;
+
+  // Topical label annotations on video level or user specified segment level.
  // There is exactly one element for each unique label.
  repeated LabelAnnotation segment_label_annotations = 2;

-  // Label annotations on shot level.
+  // Presence label annotations on video level or user specified segment level.
+  // There is exactly one element for each unique label. Compared to the
+  // existing topical `segment_label_annotations`, this field presents more
+  // fine-grained, segment-level labels detected in video content and is made
+  // available only when the client sets `LabelDetectionConfig.model` to
+  // "builtin/latest" in the request.
+  repeated LabelAnnotation segment_presence_label_annotations = 23;
+
+  // Topical label annotations on shot level.
  // There is exactly one element for each unique label.
  repeated LabelAnnotation shot_label_annotations = 3;

+  // Presence label annotations on shot level. There is exactly one element for
+  // each unique label. Compared to the existing topical
+  // `shot_label_annotations`, this field presents more fine-grained, shot-level
+  // labels detected in video content and is made available only when the client
+  // sets `LabelDetectionConfig.model` to "builtin/latest" in the request.
+  repeated LabelAnnotation shot_presence_label_annotations = 24;
+
  // Label annotations on frame level.
  // There is exactly one element for each unique label.
  repeated LabelAnnotation frame_label_annotations = 4;
@ -373,6 +448,9 @@ message VideoAnnotationResults {
  // Annotations for list of logos detected, tracked and recognized in video.
  repeated LogoRecognitionAnnotation logo_recognition_annotations = 19;

+  // Celebrity recognition annotations.
+  CelebrityRecognitionAnnotation celebrity_recognition_annotations = 21;
+
  // If set, indicates an error. Note that for a single `AnnotateVideoRequest`
  // some videos may succeed and some may fail.
  google.rpc.Status error = 9;
@ -401,6 +479,14 @@ message VideoAnnotationProgress {

  // Time of the most recent update.
  google.protobuf.Timestamp update_time = 4;
+
+  // Specifies which feature is being tracked if the request contains more than
+  // one features.
+  Feature feature = 5;
+
+  // Specifies which segment is being tracked if the request contains more than
+  // one segments.
+  VideoSegment segment = 6;
 }

 // Video annotation progress. Included in the `metadata`
@ -413,72 +499,73 @@ message AnnotateVideoProgress {

 // Config for SPEECH_TRANSCRIPTION.
 message SpeechTranscriptionConfig {
-  // *Required* The language of the supplied audio as a
+  // Required. *Required* The language of the supplied audio as a
  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
  // Example: "en-US".
  // See [Language Support](https://cloud.google.com/speech/docs/languages)
  // for a list of the currently supported language codes.
-  string language_code = 1;
+  string language_code = 1 [(google.api.field_behavior) = REQUIRED];

-  // *Optional* Maximum number of recognition hypotheses to be returned.
+  // Optional. Maximum number of recognition hypotheses to be returned.
  // Specifically, the maximum number of `SpeechRecognitionAlternative` messages
  // within each `SpeechTranscription`. The server may return fewer than
  // `max_alternatives`. Valid values are `0`-`30`. A value of `0` or `1` will
  // return a maximum of one. If omitted, will return a maximum of one.
-  int32 max_alternatives = 2;
+  int32 max_alternatives = 2 [(google.api.field_behavior) = OPTIONAL];

-  // *Optional* If set to `true`, the server will attempt to filter out
+  // Optional. If set to `true`, the server will attempt to filter out
  // profanities, replacing all but the initial character in each filtered word
  // with asterisks, e.g. "f***". If set to `false` or omitted, profanities
  // won't be filtered out.
-  bool filter_profanity = 3;
+  bool filter_profanity = 3 [(google.api.field_behavior) = OPTIONAL];

-  // *Optional* A means to provide context to assist the speech recognition.
-  repeated SpeechContext speech_contexts = 4;
+  // Optional. A means to provide context to assist the speech recognition.
+  repeated SpeechContext speech_contexts = 4
+      [(google.api.field_behavior) = OPTIONAL];

-  // *Optional* If 'true', adds punctuation to recognition result hypotheses.
+  // Optional. If 'true', adds punctuation to recognition result hypotheses.
  // This feature is only available in select languages. Setting this for
  // requests in other languages has no effect at all. The default 'false' value
  // does not add punctuation to result hypotheses. NOTE: "This is currently
  // offered as an experimental service, complimentary to all users. In the
  // future this may be exclusively available as a premium feature."
-  bool enable_automatic_punctuation = 5;
+  bool enable_automatic_punctuation = 5
+      [(google.api.field_behavior) = OPTIONAL];

-  // *Optional* For file formats, such as MXF or MKV, supporting multiple audio
+  // Optional. For file formats, such as MXF or MKV, supporting multiple audio
  // tracks, specify up to two tracks. Default: track 0.
-  repeated int32 audio_tracks = 6;
+  repeated int32 audio_tracks = 6 [(google.api.field_behavior) = OPTIONAL];

-  // *Optional* If 'true', enables speaker detection for each recognized word in
+  // Optional. If 'true', enables speaker detection for each recognized word in
  // the top alternative of the recognition result using a speaker_tag provided
  // in the WordInfo.
  // Note: When this is true, we send all the words from the beginning of the
  // audio for the top alternative in every consecutive responses.
  // This is done in order to improve our speaker tags as our models learn to
  // identify the speakers in the conversation over time.
-  bool enable_speaker_diarization = 7;
+  bool enable_speaker_diarization = 7 [(google.api.field_behavior) = OPTIONAL];

-  // *Optional*
-  // If set, specifies the estimated number of speakers in the conversation.
-  // If not set, defaults to '2'.
-  // Ignored unless enable_speaker_diarization is set to true.
-  int32 diarization_speaker_count = 8;
+  // Optional. If set, specifies the estimated number of speakers in the
+  // conversation. If not set, defaults to '2'. Ignored unless
+  // enable_speaker_diarization is set to true.
+  int32 diarization_speaker_count = 8 [(google.api.field_behavior) = OPTIONAL];

-  // *Optional* If `true`, the top result includes a list of words and the
+  // Optional. If `true`, the top result includes a list of words and the
  // confidence for those words. If `false`, no word-level confidence
  // information is returned. The default is `false`.
-  bool enable_word_confidence = 9;
+  bool enable_word_confidence = 9 [(google.api.field_behavior) = OPTIONAL];
 }

 // Provides "hints" to the speech recognizer to favor specific words and phrases
 // in the results.
 message SpeechContext {
-  // *Optional* A list of strings containing words and phrases "hints" so that
+  // Optional. A list of strings containing words and phrases "hints" so that
  // the speech recognition is more likely to recognize them. This can be used
  // to improve the accuracy for specific words and phrases, for example, if
  // specific commands are typically spoken by the user. This can also be used
  // to add additional words to the vocabulary of the recognizer. See
  // [usage limits](https://cloud.google.com/speech/limits#content).
-  repeated string phrases = 1;
+  repeated string phrases = 1 [(google.api.field_behavior) = OPTIONAL];
 }

 // A speech recognition result corresponding to a portion of the audio.
@ -489,11 +576,10 @@ message SpeechTranscription {
  // ranked by the recognizer.
  repeated SpeechRecognitionAlternative alternatives = 1;

-  // Output only. The
-  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of the
-  // language in this result. This language code was detected to have the most
-  // likelihood of being spoken in the audio.
-  string language_code = 2;
+  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
+  // language tag of the language in this result. This language code was
+  // detected to have the most likelihood of being spoken in the audio.
+  string language_code = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
 }

 // Alternative hypotheses (a.k.a. n-best list).
@ -501,16 +587,18 @@ message SpeechRecognitionAlternative {
  // Transcript text representing the words that the user spoke.
  string transcript = 1;

-  // The confidence estimate between 0.0 and 1.0. A higher number
+  // Output only. The confidence estimate between 0.0 and 1.0. A higher number
  // indicates an estimated greater likelihood that the recognized words are
-  // correct. This field is typically provided only for the top hypothesis, and
-  // only for `is_final=true` results. Clients should not rely on the
-  // `confidence` field as it is not guaranteed to be accurate or consistent.
+  // correct. This field is set only for the top alternative.
+  // This field is not guaranteed to be accurate and users should not rely on it
+  // to be always provided.
  // The default of 0.0 is a sentinel value indicating `confidence` was not set.
-  float confidence = 2;
+  float confidence = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

-  // A list of word-specific information for each recognized word.
-  repeated WordInfo words = 3;
+  // Output only. A list of word-specific information for each recognized word.
+  // Note: When `enable_speaker_diarization` is true, you will see all the words
+  // from the beginning of the audio.
+  repeated WordInfo words = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
 }

 // Word-specific information for recognized words. Word information is only
@ -538,13 +626,13 @@ message WordInfo {
  // This field is not guaranteed to be accurate and users should not rely on it
  // to be always provided.
  // The default of 0.0 is a sentinel value indicating `confidence` was not set.
-  float confidence = 4;
+  float confidence = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. A distinct integer value is assigned for every speaker within
  // the audio. This field specifies which one of those speakers was detected to
  // have spoken this word. Value ranges from 1 up to diarization_speaker_count,
  // and is only set if speaker diarization is enabled.
-  int32 speaker_tag = 5;
+  int32 speaker_tag = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
 }

 // A vertex represents a 2D point in the image.
@ -645,6 +733,7 @@ message ObjectTrackingAnnotation {
    // Non-streaming batch mode ONLY.
    // Each object track corresponds to one video segment where it appears.
    VideoSegment segment = 3;
+
    // Streaming mode ONLY.
    // In streaming mode, we do not know the end time of a tracked object
    // before it is completed. Hence, there is no VideoSegment info returned.
@ -712,26 +801,24 @@ message StreamingAnnotateVideoResponse {
  string annotation_results_uri = 3;
 }

-// Config for AUTOML_CLASSIFICATION in streaming mode.
+// Config for STREAMING_AUTOML_CLASSIFICATION.
 message StreamingAutomlClassificationConfig {
  // Resource name of AutoML model.
  // Format: `projects/{project_id}/locations/{location_id}/models/{model_id}`
  string model_name = 1;
 }

-// Config for AUTOML_OBJECT_TRACKING in streaming mode.
+// Config for STREAMING_AUTOML_OBJECT_TRACKING.
 message StreamingAutomlObjectTrackingConfig {
  // Resource name of AutoML model.
  // Format: `projects/{project_id}/locations/{location_id}/models/{model_id}`
  string model_name = 1;
 }

-// Config for EXPLICIT_CONTENT_DETECTION in streaming mode.
-message StreamingExplicitContentDetectionConfig {
-  // No customized config support.
-}
+// Config for STREAMING_EXPLICIT_CONTENT_DETECTION.
+message StreamingExplicitContentDetectionConfig {}

-// Config for LABEL_DETECTION in streaming mode.
+// Config for STREAMING_LABEL_DETECTION.
 message StreamingLabelDetectionConfig {
  // Whether the video has been captured from a stationary (i.e. non-moving)
  // camera. When set to true, might improve detection accuracy for moving
@ -740,14 +827,10 @@ message StreamingLabelDetectionConfig {
 }

 // Config for STREAMING_OBJECT_TRACKING.
-message StreamingObjectTrackingConfig {
-  // No customized config support.
-}
+message StreamingObjectTrackingConfig {}

-// Config for SHOT_CHANGE_DETECTION in streaming mode.
-message StreamingShotChangeDetectionConfig {
-  // No customized config support.
-}
+// Config for STREAMING_SHOT_CHANGE_DETECTION.
+message StreamingShotChangeDetectionConfig {}

 // Config for streaming storage option.
 message StreamingStorageConfig {
@ -840,6 +923,9 @@ enum Feature {

  // Logo detection, tracking, and recognition.
  LOGO_RECOGNITION = 12;
+
+  // Celebrity recognition.
+  CELEBRITY_RECOGNITION = 13;
 }

 // Label detection mode.
@ -882,16 +968,22 @@ enum Likelihood {
 enum StreamingFeature {
  // Unspecified.
  STREAMING_FEATURE_UNSPECIFIED = 0;
+
  // Label detection. Detect objects, such as dog or flower.
  STREAMING_LABEL_DETECTION = 1;
+
  // Shot change detection.
  STREAMING_SHOT_CHANGE_DETECTION = 2;
+
  // Explicit content detection.
  STREAMING_EXPLICIT_CONTENT_DETECTION = 3;
+
  // Object detection and tracking.
  STREAMING_OBJECT_TRACKING = 4;
+
  // Video classification based on AutoML model.
  STREAMING_AUTOML_CLASSIFICATION = 21;
+
  // Object detection and tracking based on AutoML model.
  STREAMING_AUTOML_OBJECT_TRACKING = 22;
 }