feat: add media translation v1alpha1 API

PiperOrigin-RevId: 328476173
2020-08-25 23:29:03 -07:00 · 2020-08-25 23:29:03 -07:00 · 1a85976e6b
parent 1a97126c6c
commit 1a85976e6b
5 changed files with 491 additions and 0 deletions
--- a/google/cloud/mediatranslation/v1alpha1/BUILD.bazel
+++ b/google/cloud/mediatranslation/v1alpha1/BUILD.bazel
@ -0,0 +1,167 @@
+# This file was automatically generated by BuildFileGenerator
+
+# This is an API workspace, having public visibility by default makes perfect sense.
+package(default_visibility = ["//visibility:public"])
+
+##############################################################################
+# Common
+##############################################################################
+load("@rules_proto//proto:defs.bzl", "proto_library")
+
+proto_library(
+    name = "mediatranslation_proto",
+    srcs = [
+        "media_translation.proto",
+    ],
+    deps = [
+        "//google/api:client_proto",
+        "//google/api:field_behavior_proto",
+        "//google/rpc:status_proto",
+    ],
+)
+
+##############################################################################
+# Java
+##############################################################################
+load(
+    "@com_google_googleapis_imports//:imports.bzl",
+    "java_grpc_library",
+    "java_proto_library",
+)
+
+java_proto_library(
+    name = "mediatranslation_java_proto",
+    deps = [":mediatranslation_proto"],
+)
+
+java_grpc_library(
+    name = "mediatranslation_java_grpc",
+    srcs = [":mediatranslation_proto"],
+    deps = [":mediatranslation_java_proto"],
+)
+
+##############################################################################
+# Go
+##############################################################################
+load(
+    "@com_google_googleapis_imports//:imports.bzl",
+    "go_proto_library",
+)
+
+go_proto_library(
+    name = "mediatranslation_go_proto",
+    compilers = ["@io_bazel_rules_go//proto:go_grpc"],
+    importpath = "google.golang.org/genproto/googleapis/cloud/mediatranslation/v1alpha1",
+    protos = [":mediatranslation_proto"],
+    deps = [
+        "//google/api:annotations_go_proto",
+        "//google/rpc:status_go_proto",
+    ],
+)
+
+##############################################################################
+# Python
+##############################################################################
+load(
+    "@com_google_googleapis_imports//:imports.bzl",
+    "moved_proto_library",
+    "py_grpc_library",
+    "py_proto_library",
+)
+
+moved_proto_library(
+    name = "mediatranslation_moved_proto",
+    srcs = [":mediatranslation_proto"],
+    deps = [
+        "//google/api:client_proto",
+        "//google/api:field_behavior_proto",
+        "//google/rpc:status_proto",
+    ],
+)
+
+py_proto_library(
+    name = "mediatranslation_py_proto",
+    plugin = "@protoc_docs_plugin//:docs_plugin",
+    deps = [":mediatranslation_moved_proto"],
+)
+
+py_grpc_library(
+    name = "mediatranslation_py_grpc",
+    srcs = [":mediatranslation_moved_proto"],
+    deps = [":mediatranslation_py_proto"],
+)
+
+##############################################################################
+# PHP
+##############################################################################
+load(
+    "@com_google_googleapis_imports//:imports.bzl",
+    "php_grpc_library",
+    "php_proto_library",
+)
+
+php_proto_library(
+    name = "mediatranslation_php_proto",
+    deps = [":mediatranslation_proto"],
+)
+
+php_grpc_library(
+    name = "mediatranslation_php_grpc",
+    srcs = [":mediatranslation_proto"],
+    deps = [":mediatranslation_php_proto"],
+)
+
+##############################################################################
+# Node.js
+##############################################################################
+load(
+    "@com_google_googleapis_imports//:imports.bzl",
+    "nodejs_gapic_assembly_pkg",
+    "nodejs_gapic_library",
+)
+
+
+##############################################################################
+# Ruby
+##############################################################################
+load(
+    "@com_google_googleapis_imports//:imports.bzl",
+    "ruby_grpc_library",
+    "ruby_proto_library",
+)
+
+ruby_proto_library(
+    name = "mediatranslation_ruby_proto",
+    deps = [":mediatranslation_proto"],
+)
+
+ruby_grpc_library(
+    name = "mediatranslation_ruby_grpc",
+    srcs = [":mediatranslation_proto"],
+    deps = [":mediatranslation_ruby_proto"],
+)
+
+##############################################################################
+# C#
+##############################################################################
+load(
+    "@com_google_googleapis_imports//:imports.bzl",
+    "csharp_grpc_library",
+    "csharp_proto_library",
+)
+
+csharp_proto_library(
+    name = "mediatranslation_csharp_proto",
+    deps = [":mediatranslation_proto"],
+)
+
+csharp_grpc_library(
+    name = "mediatranslation_csharp_grpc",
+    srcs = [":mediatranslation_proto"],
+    deps = [":mediatranslation_csharp_proto"],
+)
+
+##############################################################################
+# C++
+##############################################################################
+# Put your C++ code here
--- a/google/cloud/mediatranslation/v1alpha1/media_translation.proto
+++ b/google/cloud/mediatranslation/v1alpha1/media_translation.proto
@ -0,0 +1,275 @@
+// Copyright 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.mediatranslation.v1alpha1;
+
+import "google/api/field_behavior.proto";
+import "google/rpc/status.proto";
+import "google/api/client.proto";
+
+option cc_enable_arenas = true;
+option go_package = "google.golang.org/genproto/googleapis/cloud/mediatranslation/v1alpha1;mediatranslation";
+option java_package = "com.google.cloud.mediatranslation.v1alpha1";
+
+// Provides translation from/to media types.
+service SpeechTranslationService {
+  option (google.api.default_host) = "mediatranslation.googleapis.com";
+  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
+
+  // Performs bidirectional streaming speech translation: receive results while
+  // sending audio. This method is only available via the gRPC API (not REST).
+  rpc StreamingTranslateSpeech(stream StreamingTranslateSpeechRequest) returns (stream StreamingTranslateSpeechResponse) {
+  }
+}
+
+// Provides information to the speech translation that specifies how to process
+// the request.
+message TranslateSpeechConfig {
+  // Required. Encoding of audio data.
+  // Supported formats:
+  //
+  // - `linear16`
+  //
+  //   Uncompressed 16-bit signed little-endian samples (Linear PCM).
+  //
+  // - `flac`
+  //
+  //   `flac` (Free Lossless Audio Codec) is the recommended encoding
+  //   because it is lossless--therefore recognition is not compromised--and
+  //   requires only about half the bandwidth of `linear16`.
+  //
+  // - `mulaw`
+  //
+  //   8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
+  //
+  // - `amr`
+  //
+  //   Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000.
+  //
+  // - `amr-wb`
+  //
+  //   Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000.
+  //
+  // - `ogg-opus`
+  //
+  //   Opus encoded audio frames in Ogg container
+  //   ([OggOpus](https://wiki.xiph.org/OggOpus)).
+  //   `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000.
+  //
+  // - `mp3`
+  //
+  //   MP3 audio. Support all standard MP3 bitrates (which range from 32-320
+  //   kbps). When using this encoding, `sample_rate_hertz` has to match the
+  //   sample rate of the file being used.
+  //
+  //
+  string audio_encoding = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Source language code (BCP-47) of the input audio.
+  string source_language_code = 2 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. Target language code (BCP-47) of the output.
+  string target_language_code = 3 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. A list of up to 3 additional language codes (BCP-47), listing possible
+  // alternative languages of the supplied audio. If alternative source
+  // languages are listed, speech translation result will translate in the most
+  // likely language detected including the main source_language_code. The
+  // translated result will include the language code of the language detected
+  // in the audio.
+  // Note:
+  // 1. If the provided alternative_source_language_code is not supported
+  // by current API version, we will skip that language code.
+  // 2. If user only provided one eligible alternative_source_language_codes,
+  // the translation will happen between source_language_code and
+  // alternative_source_language_codes. The target_language_code will be
+  // ignored. It will be useful in conversation mode.
+  repeated string alternative_source_language_codes = 6 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Sample rate in Hertz of the audio data. Valid values are:
+  // 8000-48000. 16000 is optimal. For best results, set the sampling rate of
+  // the audio source to 16000 Hz. If that's not possible, use the native sample
+  // rate of the audio source (instead of re-sampling).
+  //
+  int32 sample_rate_hertz = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional.
+  string model = 5 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// Config used for streaming translation.
+message StreamingTranslateSpeechConfig {
+  // Required. The common config for all the following audio contents.
+  TranslateSpeechConfig audio_config = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. If `false` or omitted, the system performs
+  // continuous translation (continuing to wait for and process audio even if
+  // the user pauses speaking) until the client closes the input stream (gRPC
+  // API) or until the maximum time limit has been reached. May return multiple
+  // `StreamingTranslateSpeechResult`s with the `is_final` flag set to `true`.
+  //
+  // If `true`, the speech translator will detect a single spoken utterance.
+  // When it detects that the user has paused or stopped speaking, it will
+  // return an `END_OF_SINGLE_UTTERANCE` event and cease translation.
+  // When the client receives `END_OF_SINGLE_UTTERANCE` event, the client should
+  // stop sending the requests. However, clients should keep receiving remaining
+  // responses until the stream is terminated. To construct the complete
+  // sentence in a streaming way, one should override (if `is_final` of previous
+  // response is false), or append (if 'is_final' of previous response is true).
+  bool single_utterance = 2 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Stability control for the media translation text. The value should be
+  // "LOW", "MEDIUM", "HIGH". It applies to text/text_and_audio translation
+  // only.
+  // For audio translation mode, we only support HIGH stability mode,
+  // low/medium stability mode will throw argument error.
+  // Default empty string will be treated as "HIGH" in audio translation mode;
+  // will be treated as "LOW" in other translation mode.
+  // Note that stability and speed would be trade off.
+  // 1. "LOW": In low mode, translation service will start to do translation
+  // right after getting recognition response. The speed will be faster.
+  // 2. "MEDIUM": In medium mode, translation service will
+  // check if the recognition response is stable enough or not, and only
+  // translate recognition response which is not likely to be changed later.
+  // 3. "HIGH": In high mode, translation service will wait for more stable
+  // recognition responses, and then start to do translation. Also, the
+  // following recognition responses cannot modify previous recognition
+  // responses. Thus it may impact quality in some situation. "HIGH" stability
+  // will generate "final" responses more frequently.
+  //
+  string stability = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Translation mode, the value should be "text", "audio", "text_and_audio".
+  // Default empty string will be treated as "text".
+  // 1. "text": The response will be text translation. Text translation has a
+  // field "is_final". Detailed definition can be found in
+  // `TextTranslationResult`.
+  // 2. "audio": The response will be audio translation. Audio translation does
+  // not have "is_final" field, which means each audio translation response is
+  // stable and will not be changed by later response.
+  // Translation mode "audio" can only be used with "high" stability mode,
+  // 3. "text_and_audio": The response will have a text translation, when
+  // "is_final" is true, we will also output its corresponding audio
+  // translation. When "is_final" is false, audio_translation field will be
+  // empty.
+  string translation_mode = 4 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. If disable_interim_results is true, we will only return "final" responses.
+  // Otherwise, we will return all the responses. Default value will be false.
+  // User can only set disable_interim_results to be true with "high" stability
+  // mode.
+  bool disable_interim_results = 5 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// The top-level message sent by the client for the `StreamingTranslateSpeech`
+// method. Multiple `StreamingTranslateSpeechRequest` messages are sent. The
+// first message must contain a `streaming_config` message and must not contain
+// `audio_content` data. All subsequent messages must contain `audio_content`
+// data and must not contain a `streaming_config` message.
+message StreamingTranslateSpeechRequest {
+  // The streaming request, which is either a streaming config or content.
+  oneof streaming_request {
+    // Provides information to the recognizer that specifies how to process the
+    // request. The first `StreamingTranslateSpeechRequest` message must contain
+    // a `streaming_config` message.
+    StreamingTranslateSpeechConfig streaming_config = 1;
+
+    // The audio data to be translated. Sequential chunks of audio data are sent
+    // in sequential `StreamingTranslateSpeechRequest` messages. The first
+    // `StreamingTranslateSpeechRequest` message must not contain
+    // `audio_content` data and all subsequent `StreamingTranslateSpeechRequest`
+    // messages must contain `audio_content` data. The audio bytes must be
+    // encoded as specified in `StreamingTranslateSpeechConfig`. Note: as with
+    // all bytes fields, protobuffers use a pure binary representation (not
+    // base64).
+    bytes audio_content = 2;
+  }
+}
+
+// A streaming speech translation result corresponding to a portion of the audio
+// that is currently being processed.
+message StreamingTranslateSpeechResult {
+  // Text translation result.
+  message TextTranslationResult {
+    // Output only. The translated sentence.
+    string translation = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+    // Output only. If `false`, this `StreamingTranslateSpeechResult` represents
+    // an interim result that may change. If `true`, this is the final time the
+    // translation service will return this particular
+    // `StreamingTranslateSpeechResult`, the streaming translator will not
+    // return any further hypotheses for this portion of the transcript and
+    // corresponding audio.
+    bool is_final = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+  }
+
+  // Audio translation result.
+  message AudioTranslationResult {
+    // Output only. The translated audio.
+    bytes audio_translation = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+  }
+
+  // Text translation result.
+  TextTranslationResult text_translation_result = 1;
+
+  // Audio translation result.
+  AudioTranslationResult audio_translation_result = 2;
+
+  // Output only. The debug only recognition result in original language. This field is debug
+  // only and will be set to empty string if not available.
+  // This is implementation detail and will not be backward compatible.
+  string recognition_result = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only.
+  string detected_source_language_code = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
+
+// A streaming speech translation response corresponding to a portion of
+// the audio currently processed.
+message StreamingTranslateSpeechResponse {
+  // Indicates the type of speech event.
+  enum SpeechEventType {
+    // No speech event specified.
+    SPEECH_EVENT_TYPE_UNSPECIFIED = 0;
+
+    // This event indicates that the server has detected the end of the user's
+    // speech utterance and expects no additional speech. Therefore, the server
+    // will not process additional audio (although it may subsequently return
+    // additional results). When the client receives `END_OF_SINGLE_UTTERANCE`
+    // event, the client should stop sending the requests. However, clients
+    // should keep receiving remaining responses until the stream is terminated.
+    // To construct the complete sentence in a streaming way, one should
+    // override (if `is_final` of previous response is `false`), or append (if
+    // `is_final` of previous response is `true`). This event is only sent if
+    // `single_utterance` was set to `true`, and is not used otherwise.
+    END_OF_SINGLE_UTTERANCE = 1;
+  }
+
+  // Output only. If set, returns a [google.rpc.Status][google.rpc.Status] message that
+  // specifies the error for the operation.
+  google.rpc.Status error = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. The translation result that is currently being processed (For text
+  // translation, `is_final` could be `true` or `false`.
+  // For audio translation, we do not have is_final field, which means each
+  // audio response is stable and will not get changed later. For
+  // text_and_audio, we still have `is_final` field in text translation, but we
+  // only output corresponsding audio when `is_final` is true.).
+  StreamingTranslateSpeechResult result = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Output only. Indicates the type of speech event.
+  SpeechEventType speech_event_type = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
+}
--- a/google/cloud/mediatranslation/v1alpha1/mediatranslation_grpc_service_config.json
+++ b/google/cloud/mediatranslation/v1alpha1/mediatranslation_grpc_service_config.json
@ -0,0 +1,19 @@
+{
+  "methodConfig": [{
+    "name": [{ "service": "google.cloud.mediatranslation.v1alpha1.SpeechTranslationService" }],
+    "timeout": "400s",
+    "retryPolicy": {
+      "maxAttempts": 5,
+      "initialBackoff": "1s",
+      "maxBackoff": "60s",
+      "backoffMultiplier": 1.3,
+      "retryableStatusCodes": ["UNAVAILABLE", "UNKNOWN", "DEADLINE_EXCEEDED"]
+    }
+  },
+  {
+    "name": [
+      { "service": "google.cloud.mediatranslation.v1alpha1.SpeechTranslationService", "method": "StreamingTranslateSpeech" }
+    ],
+    "timeout": "400s"
+  }]
+}
--- a/google/cloud/mediatranslation/v1alpha1/mediatranslation_v1alpha1.yaml
+++ b/google/cloud/mediatranslation/v1alpha1/mediatranslation_v1alpha1.yaml
@ -0,0 +1,19 @@
+type: google.api.Service
+config_version: 3
+name: mediatranslation.googleapis.com
+title: Media Translation API
+
+apis:
+- name: google.cloud.mediatranslation.v1alpha1.SpeechTranslationService
+
+backend:
+  rules:
+  - selector: google.cloud.mediatranslation.v1alpha1.SpeechTranslationService.StreamingTranslateSpeech
+    deadline: 355.0
+
+authentication:
+  rules:
+  - selector: google.cloud.mediatranslation.v1alpha1.SpeechTranslationService.StreamingTranslateSpeech
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform
--- a/google/cloud/mediatranslation/v1beta1/BUILD.bazel
+++ b/google/cloud/mediatranslation/v1beta1/BUILD.bazel
@ -1,4 +1,13 @@
 # This file was automatically generated by BuildFileGenerator
+# https://github.com/googleapis/gapic-generator/tree/master/rules_gapic/bazel
+
+# Most of the manual changes to this file will be overwritten.
+# It's **only** allowed to change the following rule attribute values:
+# - names of *_gapic_assembly_* rules
+# - certain parameters of *_gapic_library rules, including but not limited to:
+#    * extra_protoc_parameters
+#    * extra_protoc_file_parameters
+# The complete list of preserved parameters can be found in the source code.

 # This is an API workspace, having public visibility by default makes perfect sense.
 package(default_visibility = ["//visibility:public"])
@ -291,6 +300,7 @@ ruby_gapic_library(
    name = "mediatranslation_ruby_gapic",
    src = ":mediatranslation_proto_with_info",
    gapic_yaml = "mediatranslation_gapic.yaml",
+    grpc_service_config = "mediatranslation_grpc_service_config.json",
    package = "google.cloud.mediatranslation.v1beta1",
    service_yaml = "mediatranslation_v1beta1.yaml",
    deps = [
@ -335,6 +345,7 @@ csharp_gapic_library(
    name = "mediatranslation_csharp_gapic",
    src = ":mediatranslation_proto_with_info",
    gapic_yaml = "mediatranslation_gapic.yaml",
+    grpc_service_config = "mediatranslation_grpc_service_config.json",
    package = "google.cloud.mediatranslation.v1beta1",
    service_yaml = "mediatranslation_v1beta1.yaml",
    deps = [