diff --git a/google/cloud/speech/cloud_speech_v1_1beta1.yaml b/google/cloud/speech/cloud_speech_v1_1beta1.yaml deleted file mode 100644 index 55ce330f..00000000 --- a/google/cloud/speech/cloud_speech_v1_1beta1.yaml +++ /dev/null @@ -1,20 +0,0 @@ -# Google Cloud Speech API service configuration - -type: google.api.Service -config_version: 3 -name: speech.googleapis.com - -title: Google Cloud Speech API - -documentation: - summary: - Google Cloud Speech API. - -apis: -- name: google.cloud.speech.v1p1beta1.Speech - -authentication: - rules: - - selector: '*' - oauth: - canonical_scopes: https://www.googleapis.com/auth/cloud-platform diff --git a/google/cloud/speech/speech_v1_1beta1.yaml b/google/cloud/speech/speech_v1_1beta1.yaml deleted file mode 100644 index 54a428a4..00000000 --- a/google/cloud/speech/speech_v1_1beta1.yaml +++ /dev/null @@ -1,25 +0,0 @@ -# Google Cloud Speech API service configuration - -type: google.api.Service -config_version: 3 -name: speech.googleapis.com - -title: Google Cloud Speech API - -documentation: - summary: - Google Cloud Speech API. - -apis: -- name: google.cloud.speech.v1p1beta1.Speech - -authentication: - rules: - - selector: '*' - oauth: - canonical_scopes: https://www.googleapis.com/auth/cloud-platform - -http: - rules: - - selector: google.longrunning.Operations.GetOperation - get: '/v1p1beta1/operations/{name=*}' diff --git a/google/cloud/speech/v1_1beta1/cloud_speech.proto b/google/cloud/speech/v1_1beta1/cloud_speech.proto deleted file mode 100644 index 269c6006..00000000 --- a/google/cloud/speech/v1_1beta1/cloud_speech.proto +++ /dev/null @@ -1,607 +0,0 @@ -// Copyright 2017 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.speech.v1p1beta1; - -import "google/api/annotations.proto"; -import "google/longrunning/operations.proto"; -import "google/protobuf/duration.proto"; -import "google/protobuf/timestamp.proto"; -import "google/rpc/status.proto"; - -option cc_enable_arenas = true; -option go_package = "google.golang.org/genproto/googleapis/cloud/speech/v1p1beta1;speech"; -option java_multiple_files = true; -option java_outer_classname = "SpeechProto"; -option java_package = "com.google.cloud.speech.v1p1beta1"; - - -// Service that implements Google Cloud Speech API. -service Speech { - // Performs synchronous speech recognition: receive results after all audio - // has been sent and processed. - rpc Recognize(RecognizeRequest) returns (RecognizeResponse) { - option (google.api.http) = { post: "/v1p1beta1/speech:recognize" body: "*" }; - } - - // Performs asynchronous speech recognition: receive results via the - // google.longrunning.Operations interface. Returns either an - // `Operation.error` or an `Operation.response` which contains - // a `LongRunningRecognizeResponse` message. - rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) { - option (google.api.http) = { post: "/v1p1beta1/speech:longrunningrecognize" body: "*" }; - } - - // Performs bidirectional streaming speech recognition: receive results while - // sending audio. This method is only available via the gRPC API (not REST). - rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse); -} - -// The top-level message sent by the client for the `Recognize` method. -message RecognizeRequest { - // *Required* Provides information to the recognizer that specifies how to - // process the request. - RecognitionConfig config = 1; - - // *Required* The audio data to be recognized. - RecognitionAudio audio = 2; -} - -// The top-level message sent by the client for the `LongRunningRecognize` -// method. -message LongRunningRecognizeRequest { - // *Required* Provides information to the recognizer that specifies how to - // process the request. - RecognitionConfig config = 1; - - // *Required* The audio data to be recognized. - RecognitionAudio audio = 2; -} - -// The top-level message sent by the client for the `StreamingRecognize` method. -// Multiple `StreamingRecognizeRequest` messages are sent. The first message -// must contain a `streaming_config` message and must not contain `audio` data. -// All subsequent messages must contain `audio` data and must not contain a -// `streaming_config` message. -message StreamingRecognizeRequest { - // The streaming request, which is either a streaming config or audio content. - oneof streaming_request { - // Provides information to the recognizer that specifies how to process the - // request. The first `StreamingRecognizeRequest` message must contain a - // `streaming_config` message. - StreamingRecognitionConfig streaming_config = 1; - - // The audio data to be recognized. Sequential chunks of audio data are sent - // in sequential `StreamingRecognizeRequest` messages. The first - // `StreamingRecognizeRequest` message must not contain `audio_content` data - // and all subsequent `StreamingRecognizeRequest` messages must contain - // `audio_content` data. The audio bytes must be encoded as specified in - // `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a - // pure binary representation (not base64). See - // [audio limits](https://cloud.google.com/speech/limits#content). - bytes audio_content = 2; - } -} - -// Provides information to the recognizer that specifies how to process the -// request. -message StreamingRecognitionConfig { - // *Required* Provides information to the recognizer that specifies how to - // process the request. - RecognitionConfig config = 1; - - // *Optional* If `false` or omitted, the recognizer will perform continuous - // recognition (continuing to wait for and process audio even if the user - // pauses speaking) until the client closes the input stream (gRPC API) or - // until the maximum time limit has been reached. May return multiple - // `StreamingRecognitionResult`s with the `is_final` flag set to `true`. - // - // If `true`, the recognizer will detect a single spoken utterance. When it - // detects that the user has paused or stopped speaking, it will return an - // `END_OF_SINGLE_UTTERANCE` event and cease recognition. It will return no - // more than one `StreamingRecognitionResult` with the `is_final` flag set to - // `true`. - bool single_utterance = 2; - - // *Optional* If `true`, interim results (tentative hypotheses) may be - // returned as they become available (these interim results are indicated with - // the `is_final=false` flag). - // If `false` or omitted, only `is_final=true` result(s) are returned. - bool interim_results = 3; -} - -// Provides information to the recognizer that specifies how to process the -// request. -message RecognitionConfig { - // Audio encoding of the data sent in the audio message. All encodings support - // only 1 channel (mono) audio. Only `FLAC` includes a header that describes - // the bytes of audio that follow the header. The other encodings are raw - // audio bytes with no header. - // - // For best results, the audio source should be captured and transmitted using - // a lossless encoding (`FLAC` or `LINEAR16`). Recognition accuracy may be - // reduced if lossy codecs, which include the other codecs listed in - // this section, are used to capture or transmit the audio, particularly if - // background noise is present. - enum AudioEncoding { - // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. - ENCODING_UNSPECIFIED = 0; - - // Uncompressed 16-bit signed little-endian samples (Linear PCM). - LINEAR16 = 1; - - // [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio - // Codec) is the recommended encoding because it is - // lossless--therefore recognition is not compromised--and - // requires only about half the bandwidth of `LINEAR16`. `FLAC` stream - // encoding supports 16-bit and 24-bit samples, however, not all fields in - // `STREAMINFO` are supported. - FLAC = 2; - - // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. - MULAW = 3; - - // Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000. - AMR = 4; - - // Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000. - AMR_WB = 5; - - // Opus encoded audio frames in Ogg container - // ([OggOpus](https://wiki.xiph.org/OggOpus)). - // `sample_rate_hertz` must be 16000. - OGG_OPUS = 6; - - // Although the use of lossy encodings is not recommended, if a very low - // bitrate encoding is required, `OGG_OPUS` is highly preferred over - // Speex encoding. The [Speex](https://speex.org/) encoding supported by - // Cloud Speech API has a header byte in each block, as in MIME type - // `audio/x-speex-with-header-byte`. - // It is a variant of the RTP Speex encoding defined in - // [RFC 5574](https://tools.ietf.org/html/rfc5574). - // The stream is a sequence of blocks, one block per RTP packet. Each block - // starts with a byte containing the length of the block, in bytes, followed - // by one or more frames of Speex data, padded to an integral number of - // bytes (octets) as specified in RFC 5574. In other words, each RTP header - // is replaced with a single byte containing the block length. Only Speex - // wideband is supported. `sample_rate_hertz` must be 16000. - SPEEX_WITH_HEADER_BYTE = 7; - } - - // *Required* Encoding of audio data sent in all `RecognitionAudio` messages. - AudioEncoding encoding = 1; - - // *Required* Sample rate in Hertz of the audio data sent in all - // `RecognitionAudio` messages. Valid values are: 8000-48000. - // 16000 is optimal. For best results, set the sampling rate of the audio - // source to 16000 Hz. If that's not possible, use the native sample rate of - // the audio source (instead of re-sampling). - int32 sample_rate_hertz = 2; - - // *Required* The language of the supplied audio as a - // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. - // Example: "en-US". - // See [Language Support](https://cloud.google.com/speech/docs/languages) - // for a list of the currently supported language codes. - string language_code = 3; - - // *Optional* Maximum number of recognition hypotheses to be returned. - // Specifically, the maximum number of `SpeechRecognitionAlternative` messages - // within each `SpeechRecognitionResult`. - // The server may return fewer than `max_alternatives`. - // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of - // one. If omitted, will return a maximum of one. - int32 max_alternatives = 4; - - // *Optional* If set to `true`, the server will attempt to filter out - // profanities, replacing all but the initial character in each filtered word - // with asterisks, e.g. "f***". If set to `false` or omitted, profanities - // won't be filtered out. - bool profanity_filter = 5; - - // *Optional* A means to provide context to assist the speech recognition. - repeated SpeechContext speech_contexts = 6; - - // *Optional* If `true`, the top result includes a list of words and - // the start and end time offsets (timestamps) for those words. If - // `false`, no word-level time offset information is returned. The default is - // `false`. - bool enable_word_time_offsets = 8; - - // *Optional* If 'true', adds punctuation to recognition result hypotheses. - // This feature is only available in select languages. Setting this for - // requests in other languages has no effect at all. - // The default 'false' value does not add punctuation to result hypotheses. - // NOTE: "This is currently offered as an experimental service, complimentary - // to all users. In the future this may be exclusively available as a - // premium feature." - bool enable_automatic_punctuation = 11; - - // *Optional* Metadata regarding this request. - RecognitionMetadata metadata = 9; -} - -// Description of audio data to be recognized. -message RecognitionMetadata { - // Use case categories that the audio recognition request can be described - // by. - enum InteractionType { - // Use case is either unknown or is something other than one of the other - // values below. - INTERACTION_TYPE_UNSPECIFIED = 0; - - // Multiple people in a conversation or discussion. For example in a - // meeting with two or more people actively participating. Typically - // all the primary people speaking would be in the same room (if not, - // see PHONE_CALL) - DISCUSSION = 1; - - // One or more persons lecturing or presenting to others, mostly - // uninterrupted. - PRESENTATION = 2; - - // A phone-call or video-conference in which two or more people, who are - // not in the same room, are actively participating. - PHONE_CALL = 3; - - // A recorded message intended for another person to listen to. - VOICEMAIL = 4; - - // Professionally produced audio (eg. TV Show, Podcast). - PROFESSIONALLY_PRODUCED = 5; - - // Transcribe spoken questions and queries into text. - VOICE_SEARCH = 6; - - // Transcribe voice commands, such as for controlling a device. - VOICE_COMMAND = 7; - - // Transcribe speech to text to create a written document, such as a - // text-message, email or report. - DICTATION = 8; - } - - // Enumerates the types of capture settings describing an audio file. - enum MicrophoneDistance { - // Audio type is not known. - MICROPHONE_DISTANCE_UNSPECIFIED = 0; - - // The audio was captured from a closely placed microphone. Eg. phone, - // dictaphone, or handheld microphone. Generally if there speaker is within - // 1 meter of the microphone. - NEARFIELD = 1; - - // The speaker if within 3 meters of the microphone. - MIDFIELD = 2; - - // The speaker is more than 3 meters away from the microphone. - FARFIELD = 3; - } - - // The original media the speech was recorded on. - enum OriginalMediaType { - // Unknown original media type. - ORIGINAL_MEDIA_TYPE_UNSPECIFIED = 0; - - // The speech data is an audio recording. - AUDIO = 1; - - // The speech data originally recorded on a video. - VIDEO = 2; - } - - // How many speakers expected in the speech to be recognized. - enum NumberOfSpeakers { - // Unknown number of persons speaking. - NUMBER_OF_SPEAKERS_UNSPECIFIED = 0; - - // Only one person is the prominent speaker (ignore background voices). - ONE_SPEAKER = 1; - - // Two people are the prominent speakers (transcript should focus - // on the two most prominent speakers). - TWO_SPEAKERS = 2; - - // Transcribe all voices. - MULTIPLE_SPEAKERS = 3; - } - - // The type of device the speech was recorded with. - enum RecordingDeviceType { - // The recording device is unknown. - RECORDING_DEVICE_TYPE_UNSPECIFIED = 0; - - // Speech was recorded on a smartphone. - SMARTPHONE = 1; - - // Speech was recorded using a personal computer or tablet. - PC = 2; - - // Speech was recorded over a phone line. - PHONE_LINE = 3; - - // Speech was recorded in a vehicle. - VEHICLE = 4; - - // Speech was recorded outdoors. - OTHER_OUTDOOR_DEVICE = 5; - - // Speech was recorded indoors. - OTHER_INDOOR_DEVICE = 6; - } - - // The use case most closely describing the audio content to be recognized. - InteractionType interaction_type = 1; - - // The industry vertical to which this speech recognition request most - // closely applies. This is most indicative of the topics contained - // in the audio. Use the 6-digit NAICS code to identify the industry - // vertical - see https://www.naics.com/search/. - uint32 industry_naics_code_of_audio = 3; - - // The audio type that most closely describes the audio being recognized. - MicrophoneDistance microphone_distance = 4; - - // The original media the speech was recorded on. - OriginalMediaType original_media_type = 5; - - // How many people are speaking prominently in the audio and expected to be - // recognized. - NumberOfSpeakers number_of_speakers = 6; - - // The type of device the speech was recorded with. - RecordingDeviceType recording_device_type = 7; - - // The device used to make the recording. Examples 'Nexus 5X' or - // 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or - // 'Cardioid Microphone'. - string recording_device_name = 8; - - // Mime type of the original audio file. For example `audio/m4a`, - // `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`. - // A list of possible audio mime types is maintained at - // http://www.iana.org/assignments/media-types/media-types.xhtml#audio - string original_mime_type = 9; - - // Obfuscated (privacy-protected) ID of the user, to identify number of - // unique users using the service. - int64 obfuscated_id = 10; - - // Description of the content. Eg. "Recordings of federal supreme court - // hearings from 2012". - string audio_topic = 11; -} - -// Provides "hints" to the speech recognizer to favor specific words and phrases -// in the results. -message SpeechContext { - // *Optional* A list of strings containing words and phrases "hints" so that - // the speech recognition is more likely to recognize them. This can be used - // to improve the accuracy for specific words and phrases, for example, if - // specific commands are typically spoken by the user. This can also be used - // to add additional words to the vocabulary of the recognizer. See - // [usage limits](https://cloud.google.com/speech/limits#content). - repeated string phrases = 1; -} - -// Contains audio data in the encoding specified in the `RecognitionConfig`. -// Either `content` or `uri` must be supplied. Supplying both or neither -// returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See -// [audio limits](https://cloud.google.com/speech/limits#content). -message RecognitionAudio { - // The audio source, which is either inline content or a GCS uri. - oneof audio_source { - // The audio data bytes encoded as specified in - // `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a - // pure binary representation, whereas JSON representations use base64. - bytes content = 1; - - // URI that points to a file that contains audio data bytes as specified in - // `RecognitionConfig`. Currently, only Google Cloud Storage URIs are - // supported, which must be specified in the following format: - // `gs://bucket_name/object_name` (other URI formats return - // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see - // [Request URIs](https://cloud.google.com/storage/docs/reference-uris). - string uri = 2; - } -} - -// The only message returned to the client by the `Recognize` method. It -// contains the result as zero or more sequential `SpeechRecognitionResult` -// messages. -message RecognizeResponse { - // *Output-only* Sequential list of transcription results corresponding to - // sequential portions of audio. - repeated SpeechRecognitionResult results = 2; -} - -// The only message returned to the client by the `LongRunningRecognize` method. -// It contains the result as zero or more sequential `SpeechRecognitionResult` -// messages. It is included in the `result.response` field of the `Operation` -// returned by the `GetOperation` call of the `google::longrunning::Operations` -// service. -message LongRunningRecognizeResponse { - // *Output-only* Sequential list of transcription results corresponding to - // sequential portions of audio. - repeated SpeechRecognitionResult results = 2; -} - -// Describes the progress of a long-running `LongRunningRecognize` call. It is -// included in the `metadata` field of the `Operation` returned by the -// `GetOperation` call of the `google::longrunning::Operations` service. -message LongRunningRecognizeMetadata { - // Approximate percentage of audio processed thus far. Guaranteed to be 100 - // when the audio is fully processed and the results are available. - int32 progress_percent = 1; - - // Time when the request was received. - google.protobuf.Timestamp start_time = 2; - - // Time of the most recent processing update. - google.protobuf.Timestamp last_update_time = 3; -} - -// `StreamingRecognizeResponse` is the only message returned to the client by -// `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse` -// messages are streamed back to the client. If there is no recognizable -// audio, and `single_utterance` is set to false, then no messages are streamed -// back to the client. -// -// Here's an example of a series of ten `StreamingRecognizeResponse`s that might -// be returned while processing audio: -// -// 1. results { alternatives { transcript: "tube" } stability: 0.01 } -// -// 2. results { alternatives { transcript: "to be a" } stability: 0.01 } -// -// 3. results { alternatives { transcript: "to be" } stability: 0.9 } -// results { alternatives { transcript: " or not to be" } stability: 0.01 } -// -// 4. results { alternatives { transcript: "to be or not to be" -// confidence: 0.92 } -// alternatives { transcript: "to bee or not to bee" } -// is_final: true } -// -// 5. results { alternatives { transcript: " that's" } stability: 0.01 } -// -// 6. results { alternatives { transcript: " that is" } stability: 0.9 } -// results { alternatives { transcript: " the question" } stability: 0.01 } -// -// 7. results { alternatives { transcript: " that is the question" -// confidence: 0.98 } -// alternatives { transcript: " that was the question" } -// is_final: true } -// -// Notes: -// -// - Only two of the above responses #4 and #7 contain final results; they are -// indicated by `is_final: true`. Concatenating these together generates the -// full transcript: "to be or not to be that is the question". -// -// - The others contain interim `results`. #3 and #6 contain two interim -// `results`: the first portion has a high stability and is less likely to -// change; the second portion has a low stability and is very likely to -// change. A UI designer might choose to show only high stability `results`. -// -// - The specific `stability` and `confidence` values shown above are only for -// illustrative purposes. Actual values may vary. -// -// - In each response, only one of these fields will be set: -// `error`, -// `speech_event_type`, or -// one or more (repeated) `results`. -message StreamingRecognizeResponse { - // Indicates the type of speech event. - enum SpeechEventType { - // No speech event specified. - SPEECH_EVENT_UNSPECIFIED = 0; - - // This event indicates that the server has detected the end of the user's - // speech utterance and expects no additional speech. Therefore, the server - // will not process additional audio (although it may subsequently return - // additional results). The client should stop sending additional audio - // data, half-close the gRPC connection, and wait for any additional results - // until the server closes the gRPC connection. This event is only sent if - // `single_utterance` was set to `true`, and is not used otherwise. - END_OF_SINGLE_UTTERANCE = 1; - } - - // *Output-only* If set, returns a [google.rpc.Status][google.rpc.Status] message that - // specifies the error for the operation. - google.rpc.Status error = 1; - - // *Output-only* This repeated list contains zero or more results that - // correspond to consecutive portions of the audio currently being processed. - // It contains zero or more `is_final=false` results followed by zero or one - // `is_final=true` result (the newly settled portion). - repeated StreamingRecognitionResult results = 2; - - // *Output-only* Indicates the type of speech event. - SpeechEventType speech_event_type = 4; -} - -// A streaming speech recognition result corresponding to a portion of the audio -// that is currently being processed. -message StreamingRecognitionResult { - // *Output-only* May contain one or more recognition hypotheses (up to the - // maximum specified in `max_alternatives`). - repeated SpeechRecognitionAlternative alternatives = 1; - - // *Output-only* If `false`, this `StreamingRecognitionResult` represents an - // interim result that may change. If `true`, this is the final time the - // speech service will return this particular `StreamingRecognitionResult`, - // the recognizer will not return any further hypotheses for this portion of - // the transcript and corresponding audio. - bool is_final = 2; - - // *Output-only* An estimate of the likelihood that the recognizer will not - // change its guess about this interim result. Values range from 0.0 - // (completely unstable) to 1.0 (completely stable). - // This field is only provided for interim results (`is_final=false`). - // The default of 0.0 is a sentinel value indicating `stability` was not set. - float stability = 3; -} - -// A speech recognition result corresponding to a portion of the audio. -message SpeechRecognitionResult { - // *Output-only* May contain one or more recognition hypotheses (up to the - // maximum specified in `max_alternatives`). - // These alternatives are ordered in terms of accuracy, with the top (first) - // alternative being the most probable, as ranked by the recognizer. - repeated SpeechRecognitionAlternative alternatives = 1; -} - -// Alternative hypotheses (a.k.a. n-best list). -message SpeechRecognitionAlternative { - // *Output-only* Transcript text representing the words that the user spoke. - string transcript = 1; - - // *Output-only* The confidence estimate between 0.0 and 1.0. A higher number - // indicates an estimated greater likelihood that the recognized words are - // correct. This field is typically provided only for the top hypothesis, and - // only for `is_final=true` results. Clients should not rely on the - // `confidence` field as it is not guaranteed to be accurate, or even set, in - // any of the results. - // The default of 0.0 is a sentinel value indicating `confidence` was not set. - float confidence = 2; - - // *Output-only* A list of word-specific information for each recognized word. - repeated WordInfo words = 3; -} - -// Word-specific information for recognized words. Word information is only -// included in the response when certain request parameters are set, such -// as `enable_word_time_offsets`. -message WordInfo { - // *Output-only* Time offset relative to the beginning of the audio, - // and corresponding to the start of the spoken word. - // This field is only set if `enable_word_time_offsets=true` and only - // in the top hypothesis. - // This is an experimental feature and the accuracy of the time offset can - // vary. - google.protobuf.Duration start_time = 1; - - // *Output-only* Time offset relative to the beginning of the audio, - // and corresponding to the end of the spoken word. - // This field is only set if `enable_word_time_offsets=true` and only - // in the top hypothesis. - // This is an experimental feature and the accuracy of the time offset can - // vary. - google.protobuf.Duration end_time = 2; - - // *Output-only* The word corresponding to this set of information. - string word = 3; -} diff --git a/google/cloud/speech/v1_1beta1/cloud_speech_gapic.yaml b/google/cloud/speech/v1_1beta1/cloud_speech_gapic.yaml deleted file mode 100644 index bbdca978..00000000 --- a/google/cloud/speech/v1_1beta1/cloud_speech_gapic.yaml +++ /dev/null @@ -1,96 +0,0 @@ - -type: com.google.api.codegen.ConfigProto -language_settings: - java: - package_name: com.google.cloud.speech.v1_1beta1 - python: - package_name: google.cloud.gapic.speech.v1_1beta1 - go: - package_name: cloud.google.com/go/speech/apiv1_1beta1 - csharp: - package_name: Google.Cloud.Speech.V1_1Beta1 - ruby: - package_name: Google::Cloud::Speech::V1_1beta1 - php: - package_name: Google\Cloud\Speech\V1_1Beta1 - nodejs: - package_name: speech.v1_1beta1 - domain_layer_location: google-cloud -license_header: - copyright_file: copyright-google.txt - license_file: license-header-apache-2.0.txt -interfaces: -- name: google.cloud.speech.v1p1beta1.Speech - smoke_test: - method: Recognize - init_fields: - - config.language_code="en-US" - - config.sample_rate_hertz=44100 - - config.encoding=FLAC - - audio.uri="gs://gapic-toolkit/hello.flac" - collections: [] - retry_codes_def: - - name: idempotent - retry_codes: - - UNAVAILABLE - - DEADLINE_EXCEEDED - - name: non_idempotent - retry_codes: [] - retry_params_def: - - name: default - initial_retry_delay_millis: 100 - retry_delay_multiplier: 1.3 - max_retry_delay_millis: 60000 - initial_rpc_timeout_millis: 190000 - rpc_timeout_multiplier: 1 - max_rpc_timeout_millis: 190000 - total_timeout_millis: 600000 - methods: - - name: Recognize - flattening: - groups: - - parameters: - - config - - audio - required_fields: - - config - - audio - sample_code_init_fields: - - config.encoding=FLAC - - config.sample_rate_hertz=44100 - - config.language_code="en-US" - - audio.uri=gs://bucket_name/file_name.flac - request_object_method: true - retry_codes_name: idempotent - retry_params_name: default - timeout_millis: 190000 - - name: LongRunningRecognize - flattening: - groups: - - parameters: - - config - - audio - required_fields: - - config - - audio - sample_code_init_fields: - - config.encoding=FLAC - - config.sample_rate_hertz=44100 - - config.language_code="en-US" - - audio.uri=gs://bucket_name/file_name.flac - request_object_method: true - retry_codes_name: non_idempotent - retry_params_name: default - timeout_millis: 60000 - long_running: - return_type: google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse - metadata_type: google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata - initial_poll_delay_millis: 20000 - poll_delay_multiplier: 1.5 - max_poll_delay_millis: 45000 - total_poll_timeout_millis: 86400000 - - name: StreamingRecognize - request_object_method: false - retry_codes_name: idempotent - retry_params_name: default - timeout_millis: 190000