diff --git a/PACKAGES.md b/PACKAGES.md index 2afbc296..5a23cfe9 100644 --- a/PACKAGES.md +++ b/PACKAGES.md @@ -35,6 +35,7 @@ found. Currently, package delivery is only being done for Python. - [Documentation](http://pythonhosted.org/gax-google-pubsub-v1/) ### [Cloud Speech](https://cloud.google.com/speech/) +- *NOTE: v1 is the wrong version, and these will soon be replaced by v1beta1* - [GAX](https://pypi.python.org/pypi/gax-google-cloud-speech-v1) - [gRPC](https://pypi.python.org/pypi/grpc-google-cloud-speech-v1) - Documentation: TBD diff --git a/gapic/api/artman_speech.yaml b/gapic/api/artman_speech.yaml index 1104f6fb..d93e5f5c 100644 --- a/gapic/api/artman_speech.yaml +++ b/gapic/api/artman_speech.yaml @@ -1,14 +1,14 @@ common: - api_name: google-cloud-speech-v1 + api_name: google-cloud-speech-v1beta1 import_proto_path: - ${REPOROOT}/googleapis src_proto_path: - - ${REPOROOT}/googleapis/google/cloud/speech/v1 + - ${REPOROOT}/googleapis/google/cloud/speech/v1beta1 service_yaml: - ${REPOROOT}/googleapis/google/cloud/speech/cloud_speech.yaml output_dir: ${REPOROOT}/artman/output gapic_api_yaml: - - ${REPOROOT}/googleapis/google/cloud/speech/v1/cloud_speech_gapic.yaml + - ${REPOROOT}/googleapis/google/cloud/speech/v1beta1/cloud_speech_gapic.yaml java: final_repo_dir: ${REPOROOT}/gcloud-java/gcloud-java-speech python: diff --git a/google/cloud/speech/cloud_speech.yaml b/google/cloud/speech/cloud_speech.yaml index e347309d..590a6384 100644 --- a/google/cloud/speech/cloud_speech.yaml +++ b/google/cloud/speech/cloud_speech.yaml @@ -1,7 +1,7 @@ # Google Cloud Speech API service configuration type: google.api.Service -config_version: 1 +config_version: 3 name: speech.googleapis.com title: Google Cloud Speech API @@ -11,17 +11,19 @@ documentation: Google Cloud Speech API. apis: -- name: google.cloud.speech.v1.Speech +- name: google.cloud.speech.v1beta1.Speech +# TODO get LRO API call support added for Speech +#- name: google.longrunning.Operations # Pass origin and project RpcServerExtensions to the backend. context: rules: - selector: '*' requested: - - google.rpc.context.OriginContext - google.rpc.context.ProjectContext + - google.rpc.context.OriginContext -# Auth section +# Auth section. authentication: rules: - selector: '*' diff --git a/google/cloud/speech/v1/cloud_speech.proto b/google/cloud/speech/v1/cloud_speech.proto deleted file mode 100644 index 07818923..00000000 --- a/google/cloud/speech/v1/cloud_speech.proto +++ /dev/null @@ -1,293 +0,0 @@ -// Copyright 2016 Google Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.speech.v1; - -import "google/api/annotations.proto"; -import "google/rpc/status.proto"; - -option java_multiple_files = true; -option java_outer_classname = "SpeechProto"; -option java_package = "com.google.cloud.speech.v1"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/speech/v1"; - -// Service that implements Google Cloud Speech API. -service Speech { - // Perform bidirectional streaming speech-recognition: receive results while - // sending audio. - rpc Recognize(stream RecognizeRequest) returns (stream RecognizeResponse); - - // Perform non-streaming speech-recognition: receive results after all audio - // has been sent and processed. - rpc NonStreamingRecognize(RecognizeRequest) returns (NonStreamingRecognizeResponse) { - option (google.api.http) = { post: "/v1/speech:recognize" body: "*" }; - } -} - -// `RecognizeRequest` is the only message type sent by the client. -// -// When using the REST API or the gRPC `NonStreamingRecognize` API, only one -// `RecognizeRequest` message is sent, and it must contain both an -// `initial_request` and an 'audio_request`. -// -// When using the gRPC Streaming `Recognize` API, one or more `RecognizeRequest` -// messages are sent. The first message must contain an `initial_request` and -// may contain an 'audio_request`. Any subsequent messages must not contain an -// `initial_request` and must contain an 'audio_request`. -message RecognizeRequest { - // The `initial_request` message provides information to the recognizer - // that specifies how to process the request. - // - // The first `RecognizeRequest` message must contain an `initial_request`. - // Any subsequent `RecognizeRequest` messages must not contain an - // `initial_request`. - InitialRecognizeRequest initial_request = 1; - - // The audio data to be recognized. For REST or `NonStreamingRecognize`, all - // audio data must be contained in the first (and only) `RecognizeRequest` - // message. For gRPC streaming `Recognize`, sequential chunks of audio data - // are sent in sequential `RecognizeRequest` messages. - AudioRequest audio_request = 2; -} - -// The `InitialRecognizeRequest` message provides information to the recognizer -// that specifies how to process the request. -message InitialRecognizeRequest { - // Audio encoding of the data sent in the audio message. All encodings support - // only 1 channel (mono) audio. Only `FLAC` includes a header that describes - // the bytes of audio that follow the header. The other encodings are raw - // audio bytes with no header. - // - // For best results, the audio source should be captured and transmitted using - // a lossless encoding (`FLAC` or `LINEAR16`). Recognition accuracy may be - // reduced if lossy codecs (such as AMR, AMR_WB and MULAW) are used to capture - // or transmit the audio, particularly if background noise is present. - enum AudioEncoding { - // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. - ENCODING_UNSPECIFIED = 0; - - // Uncompressed 16-bit signed little-endian samples. - LINEAR16 = 1; - - // This is the recommended encoding because it uses lossless compression; - // therefore recognition accuracy is not compromised by a lossy codec. - // - // The stream FLAC (Free Lossless Audio Codec) encoding is specified at: - // http://flac.sourceforge.net/documentation.html. - // Only 16-bit samples are supported. - // Not all fields in STREAMINFO are supported. - FLAC = 2; - - // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. - MULAW = 3; - - // Adaptive Multi-Rate Narrowband codec. `sample_rate` must be 8000 Hz. - AMR = 4; - - // Adaptive Multi-Rate Wideband codec. `sample_rate` must be 16000 Hz. - AMR_WB = 5; - } - - // [Required] Encoding of audio data sent in all `AudioRequest` messages. - AudioEncoding encoding = 1; - - // [Required] Sample rate in Hertz of the audio data sent in all - // AudioRequest messages. Valid values are: 8000-48000. - // 16000 is optimal. For best results, set the sampling rate of the audio - // source to 16000 Hz. If that's not possible, use the native sample rate of - // the audio source (instead of re-sampling). - int32 sample_rate = 2; - - // [Optional] The language of the supplied audio as a BCP-47 language tag. - // Example: "en-GB" https://www.rfc-editor.org/rfc/bcp/bcp47.txt - // If omitted, defaults to "en-US". See - // [Language Support](/speech/docs/best-practices#language_support) for - // a list of the currently supported language codes. - string language_code = 3; - - // [Optional] Maximum number of recognition hypotheses to be returned. - // Specifically, the maximum number of `SpeechRecognitionAlternative` messages - // within each `SpeechRecognitionResult`. - // The server may return fewer than `max_alternatives`. - // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of - // `1`. If omitted, defaults to `1`. - int32 max_alternatives = 4; - - // [Optional] If set to `true`, the server will attempt to filter out - // profanities, replacing all but the initial character in each filtered word - // with asterisks, e.g. "f***". If set to `false` or omitted, profanities - // won't be filtered out. - bool profanity_filter = 5; - - // [Optional] If `false` or omitted, the recognizer will detect a single - // spoken utterance, and it will cease recognition when the user stops - // speaking. If `enable_endpointer_events` is `true`, it will return - // `END_OF_UTTERANCE` when it detects that the user has stopped speaking. - // In all cases, it will return no more than one `SpeechRecognitionResult`, - // and set the `is_final` flag to `true`. - // - // If `true`, the recognizer will continue recognition (even if the user - // pauses speaking) until the client closes the output stream (gRPC API) or - // completes the POST data (REST API) or when the maximum time limit has been - // reached. Multiple `SpeechRecognitionResult`s with the `is_final` flag set - // to `true` may be returned to indicate that the recognizer will not return - // any further hypotheses for that portion of the transcript. - bool continuous = 6; - - // [Optional] If this parameter is `true`, interim results may be returned as - // they become available. - // If `false` or omitted, only `is_final=true` result(s) are returned. - bool interim_results = 7; - - // [Optional] If this parameter is `true`, `EndpointerEvents` may be returned - // as they become available. - // If `false` or omitted, no `EndpointerEvents` are returned. - bool enable_endpointer_events = 8; - - // [Optional] URI that points to a file where the recognition result should - // be stored in JSON format. If omitted or empty string, the recognition - // result is returned in the response. Should be specified only for - // `NonStreamingRecognize`. If specified in a `Recognize` request, - // `Recognize` returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. - // If specified in a `NonStreamingRecognize` request, - // `NonStreamingRecognize` returns immediately, and the output file - // is created asynchronously once the audio processing completes. - // Currently, only Google Cloud Storage URIs are supported, which must be - // specified in the following format: `gs://bucket_name/object_name` - // (other URI formats return [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For - // more information, see [Request URIs](/storage/docs/reference-uris). - string output_uri = 9; - - // [Optional] A means to provide context to assist the speech recognition. - SpeechContext speech_context = 10; -} - -// Provides "hints" to the speech recognizer to favor specific words and phrases -// in the results. -message SpeechContext { - // [Optional] A list of up to 50 phrases of up to 100 characters each to - // provide words and phrases "hints" to the speech recognition so that it is - // more likely to recognize them. - repeated string phrases = 1; -} - -// Contains audio data in the encoding specified in the -// `InitialRecognizeRequest`. Either `content` or `uri` must be supplied. -// Supplying both or neither returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. -message AudioRequest { - // The audio data bytes encoded as specified in - // `InitialRecognizeRequest`. Note: as with all bytes fields, protobuffers - // use a pure binary representation, whereas JSON representations use base64. - bytes content = 1; - - // URI that points to a file that contains audio data bytes as specified in - // `InitialRecognizeRequest`. Currently, only Google Cloud Storage URIs are - // supported, which must be specified in the following format: - // `gs://bucket_name/object_name` (other URI formats return - // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see - // [Request URIs](/storage/docs/reference-uris). - string uri = 2; -} - -// `NonStreamingRecognizeResponse` is the only message returned to the client by -// `NonStreamingRecognize`. It contains the result as zero or more sequential -// `RecognizeResponse` messages. -// -// Note that streaming `Recognize` will also return multiple `RecognizeResponse` -// messages, but each message is individually streamed. -message NonStreamingRecognizeResponse { - // [Output-only] Sequential list of messages returned by the recognizer. - repeated RecognizeResponse responses = 1; -} - -// `RecognizeResponse` is the only message type returned to the client. -message RecognizeResponse { - // Indicates the type of endpointer event. - enum EndpointerEvent { - // No endpointer event specified. - ENDPOINTER_EVENT_UNSPECIFIED = 0; - - // Speech has been detected in the audio stream. - START_OF_SPEECH = 1; - - // Speech has ceased to be detected in the audio stream. - END_OF_SPEECH = 2; - - // The end of the audio stream has been reached. and it is being processed. - END_OF_AUDIO = 3; - - // This event is only sent when continuous is `false`. It indicates that the - // server has detected the end of the user's speech utterance and expects no - // additional speech. Therefore, the server will not process additional - // audio. The client should stop sending additional audio data. - END_OF_UTTERANCE = 4; - } - - // [Output-only] If set, returns a [google.rpc.Status][] message that - // specifies the error for the operation. - google.rpc.Status error = 1; - - // [Output-only] For `continuous=false`, this repeated list contains zero or - // one result that corresponds to all of the audio processed so far. For - // `continuous=true`, this repeated list contains zero or more results that - // correspond to consecutive portions of the audio being processed. - // In both cases, contains zero or one `is_final=true` result (the newly - // settled portion), followed by zero or more `is_final=false` results. - repeated SpeechRecognitionResult results = 2; - - // [Output-only] Indicates the lowest index in the `results` array that has - // changed. The repeated `SpeechRecognitionResult` results overwrite past - // results at this index and higher. - int32 result_index = 3; - - // [Output-only] Indicates the type of endpointer event. - EndpointerEvent endpoint = 4; -} - -// A speech recognition result corresponding to a portion of the audio. -message SpeechRecognitionResult { - // [Output-only] May contain one or more recognition hypotheses (up to the - // maximum specified in `max_alternatives`). - repeated SpeechRecognitionAlternative alternatives = 1; - - // [Output-only] Set `true` if this is the final time the speech service will - // return this particular `SpeechRecognitionResult`. If `false`, this - // represents an interim result that may change. - bool is_final = 2; - - // [Output-only] An estimate of the probability that the recognizer will not - // change its guess about this interim result. Values range from 0.0 - // (completely unstable) to 1.0 (completely stable). Note that this is not the - // same as `confidence`, which estimates the probability that a recognition - // result is correct. - // This field is only provided for interim results (`is_final=false`). - // The default of 0.0 is a sentinel value indicating stability was not set. - float stability = 3; -} - -// Alternative hypotheses (a.k.a. n-best list). -message SpeechRecognitionAlternative { - // [Output-only] Transcript text representing the words that the user spoke. - string transcript = 1; - - // [Output-only] The confidence estimate between 0.0 and 1.0. A higher number - // means the system is more confident that the recognition is correct. - // This field is typically provided only for the top hypothesis. and only for - // `is_final=true` results. - // The default of 0.0 is a sentinel value indicating confidence was not set. - float confidence = 2; -} diff --git a/google/cloud/speech/v1/cloud_speech_gapic.yaml b/google/cloud/speech/v1beta1/cloud_speech_gapic.yaml similarity index 55% rename from google/cloud/speech/v1/cloud_speech_gapic.yaml rename to google/cloud/speech/v1beta1/cloud_speech_gapic.yaml index dd172d76..1a213bdc 100644 --- a/google/cloud/speech/v1/cloud_speech_gapic.yaml +++ b/google/cloud/speech/v1beta1/cloud_speech_gapic.yaml @@ -1,21 +1,21 @@ type: com.google.api.codegen.ConfigProto language_settings: java: - package_name: com.google.cloud.speech.spi.v1 + package_name: com.google.cloud.speech.spi.v1beta1 python: - package_name: google.cloud.speech.v1 + package_name: google.cloud.speech.v1beta1 go: - package_name: cloud.google.com/go/speech/apiv1 + package_name: cloud.google.com/go/speech/apiv1beta1 csharp: - package_name: Google.Cloud.Speech.V1 + package_name: Google.Cloud.Speech.V1beta1 ruby: - package_name: Google::Cloud::Speech::V1 + package_name: Google::Cloud::Speech::V1beta1 php: - package_name: Google\Cloud\Speech\V1 + package_name: Google\Cloud\Speech\V1beta1 nodejs: package_name: "@google-cloud/speech" interfaces: -- name: google.cloud.speech.v1.Speech +- name: google.cloud.speech.v1beta1.Speech collections: [] retry_codes_def: - name: idempotent @@ -34,20 +34,33 @@ interfaces: max_rpc_timeout_millis: 60000 total_timeout_millis: 600000 methods: - - name: Recognize - retry_codes_name: non_idempotent - retry_params_name: default - timeout_millis: 60000 - - name: NonStreamingRecognize + - name: SyncRecognize flattening: groups: - parameters: - - initial_request - - audio_request + - config + - audio required_fields: - - initial_request - - audio_request + - config + - audio request_object_method: true retry_codes_name: idempotent retry_params_name: default timeout_millis: 60000 + - name: AsyncRecognize + flattening: + groups: + - parameters: + - config + - audio + required_fields: + - config + - audio + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 60000 + - name: StreamingRecognize + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 60000