Switch back to Cloud Speech v1beta1. (#106)
Although v1 sounds newer, that was a mistake. v1beta1 is the version in active development right now. We are commenting out google.longrunning.Operations for now until we figure out how best to support it in GAPIC codegen. Add note that the Python Speech v1 package will soon be replaced by v1beta packages.
This commit is contained in:
parent
65d3c920d3
commit
88f10253cd
|
|
@ -35,6 +35,7 @@ found. Currently, package delivery is only being done for Python.
|
|||
- [Documentation](http://pythonhosted.org/gax-google-pubsub-v1/)
|
||||
|
||||
### [Cloud Speech](https://cloud.google.com/speech/)
|
||||
- *NOTE: v1 is the wrong version, and these will soon be replaced by v1beta1*
|
||||
- [GAX](https://pypi.python.org/pypi/gax-google-cloud-speech-v1)
|
||||
- [gRPC](https://pypi.python.org/pypi/grpc-google-cloud-speech-v1)
|
||||
- Documentation: TBD
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
common:
|
||||
api_name: google-cloud-speech-v1
|
||||
api_name: google-cloud-speech-v1beta1
|
||||
import_proto_path:
|
||||
- ${REPOROOT}/googleapis
|
||||
src_proto_path:
|
||||
- ${REPOROOT}/googleapis/google/cloud/speech/v1
|
||||
- ${REPOROOT}/googleapis/google/cloud/speech/v1beta1
|
||||
service_yaml:
|
||||
- ${REPOROOT}/googleapis/google/cloud/speech/cloud_speech.yaml
|
||||
output_dir: ${REPOROOT}/artman/output
|
||||
gapic_api_yaml:
|
||||
- ${REPOROOT}/googleapis/google/cloud/speech/v1/cloud_speech_gapic.yaml
|
||||
- ${REPOROOT}/googleapis/google/cloud/speech/v1beta1/cloud_speech_gapic.yaml
|
||||
java:
|
||||
final_repo_dir: ${REPOROOT}/gcloud-java/gcloud-java-speech
|
||||
python:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# Google Cloud Speech API service configuration
|
||||
|
||||
type: google.api.Service
|
||||
config_version: 1
|
||||
config_version: 3
|
||||
name: speech.googleapis.com
|
||||
|
||||
title: Google Cloud Speech API
|
||||
|
|
@ -11,17 +11,19 @@ documentation:
|
|||
Google Cloud Speech API.
|
||||
|
||||
apis:
|
||||
- name: google.cloud.speech.v1.Speech
|
||||
- name: google.cloud.speech.v1beta1.Speech
|
||||
# TODO get LRO API call support added for Speech
|
||||
#- name: google.longrunning.Operations
|
||||
|
||||
# Pass origin and project RpcServerExtensions to the backend.
|
||||
context:
|
||||
rules:
|
||||
- selector: '*'
|
||||
requested:
|
||||
- google.rpc.context.OriginContext
|
||||
- google.rpc.context.ProjectContext
|
||||
- google.rpc.context.OriginContext
|
||||
|
||||
# Auth section
|
||||
# Auth section.
|
||||
authentication:
|
||||
rules:
|
||||
- selector: '*'
|
||||
|
|
|
|||
|
|
@ -1,293 +0,0 @@
|
|||
// Copyright 2016 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.cloud.speech.v1;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
import "google/rpc/status.proto";
|
||||
|
||||
option java_multiple_files = true;
|
||||
option java_outer_classname = "SpeechProto";
|
||||
option java_package = "com.google.cloud.speech.v1";
|
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/speech/v1";
|
||||
|
||||
// Service that implements Google Cloud Speech API.
|
||||
service Speech {
|
||||
// Perform bidirectional streaming speech-recognition: receive results while
|
||||
// sending audio.
|
||||
rpc Recognize(stream RecognizeRequest) returns (stream RecognizeResponse);
|
||||
|
||||
// Perform non-streaming speech-recognition: receive results after all audio
|
||||
// has been sent and processed.
|
||||
rpc NonStreamingRecognize(RecognizeRequest) returns (NonStreamingRecognizeResponse) {
|
||||
option (google.api.http) = { post: "/v1/speech:recognize" body: "*" };
|
||||
}
|
||||
}
|
||||
|
||||
// `RecognizeRequest` is the only message type sent by the client.
|
||||
//
|
||||
// When using the REST API or the gRPC `NonStreamingRecognize` API, only one
|
||||
// `RecognizeRequest` message is sent, and it must contain both an
|
||||
// `initial_request` and an 'audio_request`.
|
||||
//
|
||||
// When using the gRPC Streaming `Recognize` API, one or more `RecognizeRequest`
|
||||
// messages are sent. The first message must contain an `initial_request` and
|
||||
// may contain an 'audio_request`. Any subsequent messages must not contain an
|
||||
// `initial_request` and must contain an 'audio_request`.
|
||||
message RecognizeRequest {
|
||||
// The `initial_request` message provides information to the recognizer
|
||||
// that specifies how to process the request.
|
||||
//
|
||||
// The first `RecognizeRequest` message must contain an `initial_request`.
|
||||
// Any subsequent `RecognizeRequest` messages must not contain an
|
||||
// `initial_request`.
|
||||
InitialRecognizeRequest initial_request = 1;
|
||||
|
||||
// The audio data to be recognized. For REST or `NonStreamingRecognize`, all
|
||||
// audio data must be contained in the first (and only) `RecognizeRequest`
|
||||
// message. For gRPC streaming `Recognize`, sequential chunks of audio data
|
||||
// are sent in sequential `RecognizeRequest` messages.
|
||||
AudioRequest audio_request = 2;
|
||||
}
|
||||
|
||||
// The `InitialRecognizeRequest` message provides information to the recognizer
|
||||
// that specifies how to process the request.
|
||||
message InitialRecognizeRequest {
|
||||
// Audio encoding of the data sent in the audio message. All encodings support
|
||||
// only 1 channel (mono) audio. Only `FLAC` includes a header that describes
|
||||
// the bytes of audio that follow the header. The other encodings are raw
|
||||
// audio bytes with no header.
|
||||
//
|
||||
// For best results, the audio source should be captured and transmitted using
|
||||
// a lossless encoding (`FLAC` or `LINEAR16`). Recognition accuracy may be
|
||||
// reduced if lossy codecs (such as AMR, AMR_WB and MULAW) are used to capture
|
||||
// or transmit the audio, particularly if background noise is present.
|
||||
enum AudioEncoding {
|
||||
// Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
||||
ENCODING_UNSPECIFIED = 0;
|
||||
|
||||
// Uncompressed 16-bit signed little-endian samples.
|
||||
LINEAR16 = 1;
|
||||
|
||||
// This is the recommended encoding because it uses lossless compression;
|
||||
// therefore recognition accuracy is not compromised by a lossy codec.
|
||||
//
|
||||
// The stream FLAC (Free Lossless Audio Codec) encoding is specified at:
|
||||
// http://flac.sourceforge.net/documentation.html.
|
||||
// Only 16-bit samples are supported.
|
||||
// Not all fields in STREAMINFO are supported.
|
||||
FLAC = 2;
|
||||
|
||||
// 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law.
|
||||
MULAW = 3;
|
||||
|
||||
// Adaptive Multi-Rate Narrowband codec. `sample_rate` must be 8000 Hz.
|
||||
AMR = 4;
|
||||
|
||||
// Adaptive Multi-Rate Wideband codec. `sample_rate` must be 16000 Hz.
|
||||
AMR_WB = 5;
|
||||
}
|
||||
|
||||
// [Required] Encoding of audio data sent in all `AudioRequest` messages.
|
||||
AudioEncoding encoding = 1;
|
||||
|
||||
// [Required] Sample rate in Hertz of the audio data sent in all
|
||||
// AudioRequest messages. Valid values are: 8000-48000.
|
||||
// 16000 is optimal. For best results, set the sampling rate of the audio
|
||||
// source to 16000 Hz. If that's not possible, use the native sample rate of
|
||||
// the audio source (instead of re-sampling).
|
||||
int32 sample_rate = 2;
|
||||
|
||||
// [Optional] The language of the supplied audio as a BCP-47 language tag.
|
||||
// Example: "en-GB" https://www.rfc-editor.org/rfc/bcp/bcp47.txt
|
||||
// If omitted, defaults to "en-US". See
|
||||
// [Language Support](/speech/docs/best-practices#language_support) for
|
||||
// a list of the currently supported language codes.
|
||||
string language_code = 3;
|
||||
|
||||
// [Optional] Maximum number of recognition hypotheses to be returned.
|
||||
// Specifically, the maximum number of `SpeechRecognitionAlternative` messages
|
||||
// within each `SpeechRecognitionResult`.
|
||||
// The server may return fewer than `max_alternatives`.
|
||||
// Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
|
||||
// `1`. If omitted, defaults to `1`.
|
||||
int32 max_alternatives = 4;
|
||||
|
||||
// [Optional] If set to `true`, the server will attempt to filter out
|
||||
// profanities, replacing all but the initial character in each filtered word
|
||||
// with asterisks, e.g. "f***". If set to `false` or omitted, profanities
|
||||
// won't be filtered out.
|
||||
bool profanity_filter = 5;
|
||||
|
||||
// [Optional] If `false` or omitted, the recognizer will detect a single
|
||||
// spoken utterance, and it will cease recognition when the user stops
|
||||
// speaking. If `enable_endpointer_events` is `true`, it will return
|
||||
// `END_OF_UTTERANCE` when it detects that the user has stopped speaking.
|
||||
// In all cases, it will return no more than one `SpeechRecognitionResult`,
|
||||
// and set the `is_final` flag to `true`.
|
||||
//
|
||||
// If `true`, the recognizer will continue recognition (even if the user
|
||||
// pauses speaking) until the client closes the output stream (gRPC API) or
|
||||
// completes the POST data (REST API) or when the maximum time limit has been
|
||||
// reached. Multiple `SpeechRecognitionResult`s with the `is_final` flag set
|
||||
// to `true` may be returned to indicate that the recognizer will not return
|
||||
// any further hypotheses for that portion of the transcript.
|
||||
bool continuous = 6;
|
||||
|
||||
// [Optional] If this parameter is `true`, interim results may be returned as
|
||||
// they become available.
|
||||
// If `false` or omitted, only `is_final=true` result(s) are returned.
|
||||
bool interim_results = 7;
|
||||
|
||||
// [Optional] If this parameter is `true`, `EndpointerEvents` may be returned
|
||||
// as they become available.
|
||||
// If `false` or omitted, no `EndpointerEvents` are returned.
|
||||
bool enable_endpointer_events = 8;
|
||||
|
||||
// [Optional] URI that points to a file where the recognition result should
|
||||
// be stored in JSON format. If omitted or empty string, the recognition
|
||||
// result is returned in the response. Should be specified only for
|
||||
// `NonStreamingRecognize`. If specified in a `Recognize` request,
|
||||
// `Recognize` returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
||||
// If specified in a `NonStreamingRecognize` request,
|
||||
// `NonStreamingRecognize` returns immediately, and the output file
|
||||
// is created asynchronously once the audio processing completes.
|
||||
// Currently, only Google Cloud Storage URIs are supported, which must be
|
||||
// specified in the following format: `gs://bucket_name/object_name`
|
||||
// (other URI formats return [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For
|
||||
// more information, see [Request URIs](/storage/docs/reference-uris).
|
||||
string output_uri = 9;
|
||||
|
||||
// [Optional] A means to provide context to assist the speech recognition.
|
||||
SpeechContext speech_context = 10;
|
||||
}
|
||||
|
||||
// Provides "hints" to the speech recognizer to favor specific words and phrases
|
||||
// in the results.
|
||||
message SpeechContext {
|
||||
// [Optional] A list of up to 50 phrases of up to 100 characters each to
|
||||
// provide words and phrases "hints" to the speech recognition so that it is
|
||||
// more likely to recognize them.
|
||||
repeated string phrases = 1;
|
||||
}
|
||||
|
||||
// Contains audio data in the encoding specified in the
|
||||
// `InitialRecognizeRequest`. Either `content` or `uri` must be supplied.
|
||||
// Supplying both or neither returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
|
||||
message AudioRequest {
|
||||
// The audio data bytes encoded as specified in
|
||||
// `InitialRecognizeRequest`. Note: as with all bytes fields, protobuffers
|
||||
// use a pure binary representation, whereas JSON representations use base64.
|
||||
bytes content = 1;
|
||||
|
||||
// URI that points to a file that contains audio data bytes as specified in
|
||||
// `InitialRecognizeRequest`. Currently, only Google Cloud Storage URIs are
|
||||
// supported, which must be specified in the following format:
|
||||
// `gs://bucket_name/object_name` (other URI formats return
|
||||
// [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
|
||||
// [Request URIs](/storage/docs/reference-uris).
|
||||
string uri = 2;
|
||||
}
|
||||
|
||||
// `NonStreamingRecognizeResponse` is the only message returned to the client by
|
||||
// `NonStreamingRecognize`. It contains the result as zero or more sequential
|
||||
// `RecognizeResponse` messages.
|
||||
//
|
||||
// Note that streaming `Recognize` will also return multiple `RecognizeResponse`
|
||||
// messages, but each message is individually streamed.
|
||||
message NonStreamingRecognizeResponse {
|
||||
// [Output-only] Sequential list of messages returned by the recognizer.
|
||||
repeated RecognizeResponse responses = 1;
|
||||
}
|
||||
|
||||
// `RecognizeResponse` is the only message type returned to the client.
|
||||
message RecognizeResponse {
|
||||
// Indicates the type of endpointer event.
|
||||
enum EndpointerEvent {
|
||||
// No endpointer event specified.
|
||||
ENDPOINTER_EVENT_UNSPECIFIED = 0;
|
||||
|
||||
// Speech has been detected in the audio stream.
|
||||
START_OF_SPEECH = 1;
|
||||
|
||||
// Speech has ceased to be detected in the audio stream.
|
||||
END_OF_SPEECH = 2;
|
||||
|
||||
// The end of the audio stream has been reached. and it is being processed.
|
||||
END_OF_AUDIO = 3;
|
||||
|
||||
// This event is only sent when continuous is `false`. It indicates that the
|
||||
// server has detected the end of the user's speech utterance and expects no
|
||||
// additional speech. Therefore, the server will not process additional
|
||||
// audio. The client should stop sending additional audio data.
|
||||
END_OF_UTTERANCE = 4;
|
||||
}
|
||||
|
||||
// [Output-only] If set, returns a [google.rpc.Status][] message that
|
||||
// specifies the error for the operation.
|
||||
google.rpc.Status error = 1;
|
||||
|
||||
// [Output-only] For `continuous=false`, this repeated list contains zero or
|
||||
// one result that corresponds to all of the audio processed so far. For
|
||||
// `continuous=true`, this repeated list contains zero or more results that
|
||||
// correspond to consecutive portions of the audio being processed.
|
||||
// In both cases, contains zero or one `is_final=true` result (the newly
|
||||
// settled portion), followed by zero or more `is_final=false` results.
|
||||
repeated SpeechRecognitionResult results = 2;
|
||||
|
||||
// [Output-only] Indicates the lowest index in the `results` array that has
|
||||
// changed. The repeated `SpeechRecognitionResult` results overwrite past
|
||||
// results at this index and higher.
|
||||
int32 result_index = 3;
|
||||
|
||||
// [Output-only] Indicates the type of endpointer event.
|
||||
EndpointerEvent endpoint = 4;
|
||||
}
|
||||
|
||||
// A speech recognition result corresponding to a portion of the audio.
|
||||
message SpeechRecognitionResult {
|
||||
// [Output-only] May contain one or more recognition hypotheses (up to the
|
||||
// maximum specified in `max_alternatives`).
|
||||
repeated SpeechRecognitionAlternative alternatives = 1;
|
||||
|
||||
// [Output-only] Set `true` if this is the final time the speech service will
|
||||
// return this particular `SpeechRecognitionResult`. If `false`, this
|
||||
// represents an interim result that may change.
|
||||
bool is_final = 2;
|
||||
|
||||
// [Output-only] An estimate of the probability that the recognizer will not
|
||||
// change its guess about this interim result. Values range from 0.0
|
||||
// (completely unstable) to 1.0 (completely stable). Note that this is not the
|
||||
// same as `confidence`, which estimates the probability that a recognition
|
||||
// result is correct.
|
||||
// This field is only provided for interim results (`is_final=false`).
|
||||
// The default of 0.0 is a sentinel value indicating stability was not set.
|
||||
float stability = 3;
|
||||
}
|
||||
|
||||
// Alternative hypotheses (a.k.a. n-best list).
|
||||
message SpeechRecognitionAlternative {
|
||||
// [Output-only] Transcript text representing the words that the user spoke.
|
||||
string transcript = 1;
|
||||
|
||||
// [Output-only] The confidence estimate between 0.0 and 1.0. A higher number
|
||||
// means the system is more confident that the recognition is correct.
|
||||
// This field is typically provided only for the top hypothesis. and only for
|
||||
// `is_final=true` results.
|
||||
// The default of 0.0 is a sentinel value indicating confidence was not set.
|
||||
float confidence = 2;
|
||||
}
|
||||
|
|
@ -1,21 +1,21 @@
|
|||
type: com.google.api.codegen.ConfigProto
|
||||
language_settings:
|
||||
java:
|
||||
package_name: com.google.cloud.speech.spi.v1
|
||||
package_name: com.google.cloud.speech.spi.v1beta1
|
||||
python:
|
||||
package_name: google.cloud.speech.v1
|
||||
package_name: google.cloud.speech.v1beta1
|
||||
go:
|
||||
package_name: cloud.google.com/go/speech/apiv1
|
||||
package_name: cloud.google.com/go/speech/apiv1beta1
|
||||
csharp:
|
||||
package_name: Google.Cloud.Speech.V1
|
||||
package_name: Google.Cloud.Speech.V1beta1
|
||||
ruby:
|
||||
package_name: Google::Cloud::Speech::V1
|
||||
package_name: Google::Cloud::Speech::V1beta1
|
||||
php:
|
||||
package_name: Google\Cloud\Speech\V1
|
||||
package_name: Google\Cloud\Speech\V1beta1
|
||||
nodejs:
|
||||
package_name: "@google-cloud/speech"
|
||||
interfaces:
|
||||
- name: google.cloud.speech.v1.Speech
|
||||
- name: google.cloud.speech.v1beta1.Speech
|
||||
collections: []
|
||||
retry_codes_def:
|
||||
- name: idempotent
|
||||
|
|
@ -34,20 +34,33 @@ interfaces:
|
|||
max_rpc_timeout_millis: 60000
|
||||
total_timeout_millis: 600000
|
||||
methods:
|
||||
- name: Recognize
|
||||
retry_codes_name: non_idempotent
|
||||
retry_params_name: default
|
||||
timeout_millis: 60000
|
||||
- name: NonStreamingRecognize
|
||||
- name: SyncRecognize
|
||||
flattening:
|
||||
groups:
|
||||
- parameters:
|
||||
- initial_request
|
||||
- audio_request
|
||||
- config
|
||||
- audio
|
||||
required_fields:
|
||||
- initial_request
|
||||
- audio_request
|
||||
- config
|
||||
- audio
|
||||
request_object_method: true
|
||||
retry_codes_name: idempotent
|
||||
retry_params_name: default
|
||||
timeout_millis: 60000
|
||||
- name: AsyncRecognize
|
||||
flattening:
|
||||
groups:
|
||||
- parameters:
|
||||
- config
|
||||
- audio
|
||||
required_fields:
|
||||
- config
|
||||
- audio
|
||||
request_object_method: true
|
||||
retry_codes_name: idempotent
|
||||
retry_params_name: default
|
||||
timeout_millis: 60000
|
||||
- name: StreamingRecognize
|
||||
retry_codes_name: non_idempotent
|
||||
retry_params_name: default
|
||||
timeout_millis: 60000
|
||||
Loading…
Reference in New Issue