feat: add media translation v1alpha1 API
PiperOrigin-RevId: 328476173
This commit is contained in:
parent
1a97126c6c
commit
1a85976e6b
|
|
@ -0,0 +1,167 @@
|
|||
# This file was automatically generated by BuildFileGenerator
|
||||
|
||||
# This is an API workspace, having public visibility by default makes perfect sense.
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
##############################################################################
|
||||
# Common
|
||||
##############################################################################
|
||||
load("@rules_proto//proto:defs.bzl", "proto_library")
|
||||
|
||||
proto_library(
|
||||
name = "mediatranslation_proto",
|
||||
srcs = [
|
||||
"media_translation.proto",
|
||||
],
|
||||
deps = [
|
||||
"//google/api:client_proto",
|
||||
"//google/api:field_behavior_proto",
|
||||
"//google/rpc:status_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# Java
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"java_grpc_library",
|
||||
"java_proto_library",
|
||||
)
|
||||
|
||||
java_proto_library(
|
||||
name = "mediatranslation_java_proto",
|
||||
deps = [":mediatranslation_proto"],
|
||||
)
|
||||
|
||||
java_grpc_library(
|
||||
name = "mediatranslation_java_grpc",
|
||||
srcs = [":mediatranslation_proto"],
|
||||
deps = [":mediatranslation_java_proto"],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# Go
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"go_proto_library",
|
||||
)
|
||||
|
||||
go_proto_library(
|
||||
name = "mediatranslation_go_proto",
|
||||
compilers = ["@io_bazel_rules_go//proto:go_grpc"],
|
||||
importpath = "google.golang.org/genproto/googleapis/cloud/mediatranslation/v1alpha1",
|
||||
protos = [":mediatranslation_proto"],
|
||||
deps = [
|
||||
"//google/api:annotations_go_proto",
|
||||
"//google/rpc:status_go_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# Python
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"moved_proto_library",
|
||||
"py_grpc_library",
|
||||
"py_proto_library",
|
||||
)
|
||||
|
||||
moved_proto_library(
|
||||
name = "mediatranslation_moved_proto",
|
||||
srcs = [":mediatranslation_proto"],
|
||||
deps = [
|
||||
"//google/api:client_proto",
|
||||
"//google/api:field_behavior_proto",
|
||||
"//google/rpc:status_proto",
|
||||
],
|
||||
)
|
||||
|
||||
py_proto_library(
|
||||
name = "mediatranslation_py_proto",
|
||||
plugin = "@protoc_docs_plugin//:docs_plugin",
|
||||
deps = [":mediatranslation_moved_proto"],
|
||||
)
|
||||
|
||||
py_grpc_library(
|
||||
name = "mediatranslation_py_grpc",
|
||||
srcs = [":mediatranslation_moved_proto"],
|
||||
deps = [":mediatranslation_py_proto"],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# PHP
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"php_grpc_library",
|
||||
"php_proto_library",
|
||||
)
|
||||
|
||||
php_proto_library(
|
||||
name = "mediatranslation_php_proto",
|
||||
deps = [":mediatranslation_proto"],
|
||||
)
|
||||
|
||||
php_grpc_library(
|
||||
name = "mediatranslation_php_grpc",
|
||||
srcs = [":mediatranslation_proto"],
|
||||
deps = [":mediatranslation_php_proto"],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# Node.js
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"nodejs_gapic_assembly_pkg",
|
||||
"nodejs_gapic_library",
|
||||
)
|
||||
|
||||
|
||||
##############################################################################
|
||||
# Ruby
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"ruby_grpc_library",
|
||||
"ruby_proto_library",
|
||||
)
|
||||
|
||||
ruby_proto_library(
|
||||
name = "mediatranslation_ruby_proto",
|
||||
deps = [":mediatranslation_proto"],
|
||||
)
|
||||
|
||||
ruby_grpc_library(
|
||||
name = "mediatranslation_ruby_grpc",
|
||||
srcs = [":mediatranslation_proto"],
|
||||
deps = [":mediatranslation_ruby_proto"],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# C#
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"csharp_grpc_library",
|
||||
"csharp_proto_library",
|
||||
)
|
||||
|
||||
csharp_proto_library(
|
||||
name = "mediatranslation_csharp_proto",
|
||||
deps = [":mediatranslation_proto"],
|
||||
)
|
||||
|
||||
csharp_grpc_library(
|
||||
name = "mediatranslation_csharp_grpc",
|
||||
srcs = [":mediatranslation_proto"],
|
||||
deps = [":mediatranslation_csharp_proto"],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# C++
|
||||
##############################################################################
|
||||
# Put your C++ code here
|
||||
|
|
@ -0,0 +1,275 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.cloud.mediatranslation.v1alpha1;
|
||||
|
||||
import "google/api/field_behavior.proto";
|
||||
import "google/rpc/status.proto";
|
||||
import "google/api/client.proto";
|
||||
|
||||
option cc_enable_arenas = true;
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/mediatranslation/v1alpha1;mediatranslation";
|
||||
option java_package = "com.google.cloud.mediatranslation.v1alpha1";
|
||||
|
||||
// Provides translation from/to media types.
|
||||
service SpeechTranslationService {
|
||||
option (google.api.default_host) = "mediatranslation.googleapis.com";
|
||||
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
|
||||
|
||||
// Performs bidirectional streaming speech translation: receive results while
|
||||
// sending audio. This method is only available via the gRPC API (not REST).
|
||||
rpc StreamingTranslateSpeech(stream StreamingTranslateSpeechRequest) returns (stream StreamingTranslateSpeechResponse) {
|
||||
}
|
||||
}
|
||||
|
||||
// Provides information to the speech translation that specifies how to process
|
||||
// the request.
|
||||
message TranslateSpeechConfig {
|
||||
// Required. Encoding of audio data.
|
||||
// Supported formats:
|
||||
//
|
||||
// - `linear16`
|
||||
//
|
||||
// Uncompressed 16-bit signed little-endian samples (Linear PCM).
|
||||
//
|
||||
// - `flac`
|
||||
//
|
||||
// `flac` (Free Lossless Audio Codec) is the recommended encoding
|
||||
// because it is lossless--therefore recognition is not compromised--and
|
||||
// requires only about half the bandwidth of `linear16`.
|
||||
//
|
||||
// - `mulaw`
|
||||
//
|
||||
// 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
|
||||
//
|
||||
// - `amr`
|
||||
//
|
||||
// Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000.
|
||||
//
|
||||
// - `amr-wb`
|
||||
//
|
||||
// Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000.
|
||||
//
|
||||
// - `ogg-opus`
|
||||
//
|
||||
// Opus encoded audio frames in Ogg container
|
||||
// ([OggOpus](https://wiki.xiph.org/OggOpus)).
|
||||
// `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000.
|
||||
//
|
||||
// - `mp3`
|
||||
//
|
||||
// MP3 audio. Support all standard MP3 bitrates (which range from 32-320
|
||||
// kbps). When using this encoding, `sample_rate_hertz` has to match the
|
||||
// sample rate of the file being used.
|
||||
//
|
||||
//
|
||||
string audio_encoding = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Required. Source language code (BCP-47) of the input audio.
|
||||
string source_language_code = 2 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Required. Target language code (BCP-47) of the output.
|
||||
string target_language_code = 3 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Optional. A list of up to 3 additional language codes (BCP-47), listing possible
|
||||
// alternative languages of the supplied audio. If alternative source
|
||||
// languages are listed, speech translation result will translate in the most
|
||||
// likely language detected including the main source_language_code. The
|
||||
// translated result will include the language code of the language detected
|
||||
// in the audio.
|
||||
// Note:
|
||||
// 1. If the provided alternative_source_language_code is not supported
|
||||
// by current API version, we will skip that language code.
|
||||
// 2. If user only provided one eligible alternative_source_language_codes,
|
||||
// the translation will happen between source_language_code and
|
||||
// alternative_source_language_codes. The target_language_code will be
|
||||
// ignored. It will be useful in conversation mode.
|
||||
repeated string alternative_source_language_codes = 6 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. Sample rate in Hertz of the audio data. Valid values are:
|
||||
// 8000-48000. 16000 is optimal. For best results, set the sampling rate of
|
||||
// the audio source to 16000 Hz. If that's not possible, use the native sample
|
||||
// rate of the audio source (instead of re-sampling).
|
||||
//
|
||||
int32 sample_rate_hertz = 4 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional.
|
||||
string model = 5 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// Config used for streaming translation.
|
||||
message StreamingTranslateSpeechConfig {
|
||||
// Required. The common config for all the following audio contents.
|
||||
TranslateSpeechConfig audio_config = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Optional. If `false` or omitted, the system performs
|
||||
// continuous translation (continuing to wait for and process audio even if
|
||||
// the user pauses speaking) until the client closes the input stream (gRPC
|
||||
// API) or until the maximum time limit has been reached. May return multiple
|
||||
// `StreamingTranslateSpeechResult`s with the `is_final` flag set to `true`.
|
||||
//
|
||||
// If `true`, the speech translator will detect a single spoken utterance.
|
||||
// When it detects that the user has paused or stopped speaking, it will
|
||||
// return an `END_OF_SINGLE_UTTERANCE` event and cease translation.
|
||||
// When the client receives `END_OF_SINGLE_UTTERANCE` event, the client should
|
||||
// stop sending the requests. However, clients should keep receiving remaining
|
||||
// responses until the stream is terminated. To construct the complete
|
||||
// sentence in a streaming way, one should override (if `is_final` of previous
|
||||
// response is false), or append (if 'is_final' of previous response is true).
|
||||
bool single_utterance = 2 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. Stability control for the media translation text. The value should be
|
||||
// "LOW", "MEDIUM", "HIGH". It applies to text/text_and_audio translation
|
||||
// only.
|
||||
// For audio translation mode, we only support HIGH stability mode,
|
||||
// low/medium stability mode will throw argument error.
|
||||
// Default empty string will be treated as "HIGH" in audio translation mode;
|
||||
// will be treated as "LOW" in other translation mode.
|
||||
// Note that stability and speed would be trade off.
|
||||
// 1. "LOW": In low mode, translation service will start to do translation
|
||||
// right after getting recognition response. The speed will be faster.
|
||||
// 2. "MEDIUM": In medium mode, translation service will
|
||||
// check if the recognition response is stable enough or not, and only
|
||||
// translate recognition response which is not likely to be changed later.
|
||||
// 3. "HIGH": In high mode, translation service will wait for more stable
|
||||
// recognition responses, and then start to do translation. Also, the
|
||||
// following recognition responses cannot modify previous recognition
|
||||
// responses. Thus it may impact quality in some situation. "HIGH" stability
|
||||
// will generate "final" responses more frequently.
|
||||
//
|
||||
string stability = 3 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. Translation mode, the value should be "text", "audio", "text_and_audio".
|
||||
// Default empty string will be treated as "text".
|
||||
// 1. "text": The response will be text translation. Text translation has a
|
||||
// field "is_final". Detailed definition can be found in
|
||||
// `TextTranslationResult`.
|
||||
// 2. "audio": The response will be audio translation. Audio translation does
|
||||
// not have "is_final" field, which means each audio translation response is
|
||||
// stable and will not be changed by later response.
|
||||
// Translation mode "audio" can only be used with "high" stability mode,
|
||||
// 3. "text_and_audio": The response will have a text translation, when
|
||||
// "is_final" is true, we will also output its corresponding audio
|
||||
// translation. When "is_final" is false, audio_translation field will be
|
||||
// empty.
|
||||
string translation_mode = 4 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. If disable_interim_results is true, we will only return "final" responses.
|
||||
// Otherwise, we will return all the responses. Default value will be false.
|
||||
// User can only set disable_interim_results to be true with "high" stability
|
||||
// mode.
|
||||
bool disable_interim_results = 5 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// The top-level message sent by the client for the `StreamingTranslateSpeech`
|
||||
// method. Multiple `StreamingTranslateSpeechRequest` messages are sent. The
|
||||
// first message must contain a `streaming_config` message and must not contain
|
||||
// `audio_content` data. All subsequent messages must contain `audio_content`
|
||||
// data and must not contain a `streaming_config` message.
|
||||
message StreamingTranslateSpeechRequest {
|
||||
// The streaming request, which is either a streaming config or content.
|
||||
oneof streaming_request {
|
||||
// Provides information to the recognizer that specifies how to process the
|
||||
// request. The first `StreamingTranslateSpeechRequest` message must contain
|
||||
// a `streaming_config` message.
|
||||
StreamingTranslateSpeechConfig streaming_config = 1;
|
||||
|
||||
// The audio data to be translated. Sequential chunks of audio data are sent
|
||||
// in sequential `StreamingTranslateSpeechRequest` messages. The first
|
||||
// `StreamingTranslateSpeechRequest` message must not contain
|
||||
// `audio_content` data and all subsequent `StreamingTranslateSpeechRequest`
|
||||
// messages must contain `audio_content` data. The audio bytes must be
|
||||
// encoded as specified in `StreamingTranslateSpeechConfig`. Note: as with
|
||||
// all bytes fields, protobuffers use a pure binary representation (not
|
||||
// base64).
|
||||
bytes audio_content = 2;
|
||||
}
|
||||
}
|
||||
|
||||
// A streaming speech translation result corresponding to a portion of the audio
|
||||
// that is currently being processed.
|
||||
message StreamingTranslateSpeechResult {
|
||||
// Text translation result.
|
||||
message TextTranslationResult {
|
||||
// Output only. The translated sentence.
|
||||
string translation = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
|
||||
// Output only. If `false`, this `StreamingTranslateSpeechResult` represents
|
||||
// an interim result that may change. If `true`, this is the final time the
|
||||
// translation service will return this particular
|
||||
// `StreamingTranslateSpeechResult`, the streaming translator will not
|
||||
// return any further hypotheses for this portion of the transcript and
|
||||
// corresponding audio.
|
||||
bool is_final = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
}
|
||||
|
||||
// Audio translation result.
|
||||
message AudioTranslationResult {
|
||||
// Output only. The translated audio.
|
||||
bytes audio_translation = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
}
|
||||
|
||||
// Text translation result.
|
||||
TextTranslationResult text_translation_result = 1;
|
||||
|
||||
// Audio translation result.
|
||||
AudioTranslationResult audio_translation_result = 2;
|
||||
|
||||
// Output only. The debug only recognition result in original language. This field is debug
|
||||
// only and will be set to empty string if not available.
|
||||
// This is implementation detail and will not be backward compatible.
|
||||
string recognition_result = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
|
||||
// Output only.
|
||||
string detected_source_language_code = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
}
|
||||
|
||||
// A streaming speech translation response corresponding to a portion of
|
||||
// the audio currently processed.
|
||||
message StreamingTranslateSpeechResponse {
|
||||
// Indicates the type of speech event.
|
||||
enum SpeechEventType {
|
||||
// No speech event specified.
|
||||
SPEECH_EVENT_TYPE_UNSPECIFIED = 0;
|
||||
|
||||
// This event indicates that the server has detected the end of the user's
|
||||
// speech utterance and expects no additional speech. Therefore, the server
|
||||
// will not process additional audio (although it may subsequently return
|
||||
// additional results). When the client receives `END_OF_SINGLE_UTTERANCE`
|
||||
// event, the client should stop sending the requests. However, clients
|
||||
// should keep receiving remaining responses until the stream is terminated.
|
||||
// To construct the complete sentence in a streaming way, one should
|
||||
// override (if `is_final` of previous response is `false`), or append (if
|
||||
// `is_final` of previous response is `true`). This event is only sent if
|
||||
// `single_utterance` was set to `true`, and is not used otherwise.
|
||||
END_OF_SINGLE_UTTERANCE = 1;
|
||||
}
|
||||
|
||||
// Output only. If set, returns a [google.rpc.Status][google.rpc.Status] message that
|
||||
// specifies the error for the operation.
|
||||
google.rpc.Status error = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
|
||||
// Output only. The translation result that is currently being processed (For text
|
||||
// translation, `is_final` could be `true` or `false`.
|
||||
// For audio translation, we do not have is_final field, which means each
|
||||
// audio response is stable and will not get changed later. For
|
||||
// text_and_audio, we still have `is_final` field in text translation, but we
|
||||
// only output corresponsding audio when `is_final` is true.).
|
||||
StreamingTranslateSpeechResult result = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
|
||||
// Output only. Indicates the type of speech event.
|
||||
SpeechEventType speech_event_type = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
{
|
||||
"methodConfig": [{
|
||||
"name": [{ "service": "google.cloud.mediatranslation.v1alpha1.SpeechTranslationService" }],
|
||||
"timeout": "400s",
|
||||
"retryPolicy": {
|
||||
"maxAttempts": 5,
|
||||
"initialBackoff": "1s",
|
||||
"maxBackoff": "60s",
|
||||
"backoffMultiplier": 1.3,
|
||||
"retryableStatusCodes": ["UNAVAILABLE", "UNKNOWN", "DEADLINE_EXCEEDED"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": [
|
||||
{ "service": "google.cloud.mediatranslation.v1alpha1.SpeechTranslationService", "method": "StreamingTranslateSpeech" }
|
||||
],
|
||||
"timeout": "400s"
|
||||
}]
|
||||
}
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
type: google.api.Service
|
||||
config_version: 3
|
||||
name: mediatranslation.googleapis.com
|
||||
title: Media Translation API
|
||||
|
||||
apis:
|
||||
- name: google.cloud.mediatranslation.v1alpha1.SpeechTranslationService
|
||||
|
||||
backend:
|
||||
rules:
|
||||
- selector: google.cloud.mediatranslation.v1alpha1.SpeechTranslationService.StreamingTranslateSpeech
|
||||
deadline: 355.0
|
||||
|
||||
authentication:
|
||||
rules:
|
||||
- selector: google.cloud.mediatranslation.v1alpha1.SpeechTranslationService.StreamingTranslateSpeech
|
||||
oauth:
|
||||
canonical_scopes: |-
|
||||
https://www.googleapis.com/auth/cloud-platform
|
||||
|
|
@ -1,4 +1,13 @@
|
|||
# This file was automatically generated by BuildFileGenerator
|
||||
# https://github.com/googleapis/gapic-generator/tree/master/rules_gapic/bazel
|
||||
|
||||
# Most of the manual changes to this file will be overwritten.
|
||||
# It's **only** allowed to change the following rule attribute values:
|
||||
# - names of *_gapic_assembly_* rules
|
||||
# - certain parameters of *_gapic_library rules, including but not limited to:
|
||||
# * extra_protoc_parameters
|
||||
# * extra_protoc_file_parameters
|
||||
# The complete list of preserved parameters can be found in the source code.
|
||||
|
||||
# This is an API workspace, having public visibility by default makes perfect sense.
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
|
@ -291,6 +300,7 @@ ruby_gapic_library(
|
|||
name = "mediatranslation_ruby_gapic",
|
||||
src = ":mediatranslation_proto_with_info",
|
||||
gapic_yaml = "mediatranslation_gapic.yaml",
|
||||
grpc_service_config = "mediatranslation_grpc_service_config.json",
|
||||
package = "google.cloud.mediatranslation.v1beta1",
|
||||
service_yaml = "mediatranslation_v1beta1.yaml",
|
||||
deps = [
|
||||
|
|
@ -335,6 +345,7 @@ csharp_gapic_library(
|
|||
name = "mediatranslation_csharp_gapic",
|
||||
src = ":mediatranslation_proto_with_info",
|
||||
gapic_yaml = "mediatranslation_gapic.yaml",
|
||||
grpc_service_config = "mediatranslation_grpc_service_config.json",
|
||||
package = "google.cloud.mediatranslation.v1beta1",
|
||||
service_yaml = "mediatranslation_v1beta1.yaml",
|
||||
deps = [
|
||||
|
|
|
|||
Loading…
Reference in New Issue