From ff4a2047b3d66f38c9b22197c370ed0d02fc0238 Mon Sep 17 00:00:00 2001 From: Google APIs Date: Thu, 19 Dec 2019 16:17:28 -0800 Subject: [PATCH] Weekly library update. PiperOrigin-RevId: 286484215 --- google/cloud/dialogflow/v2/audio_config.proto | 226 ++++++++++++------ google/cloud/dialogflow/v2/context.proto | 7 + google/cloud/dialogflow/v2/entity_type.proto | 2 + google/cloud/dialogflow/v2/intent.proto | 5 +- google/cloud/dialogflow/v2/session.proto | 46 ++-- google/cloud/dialogflow/v2/webhook.proto | 13 + 6 files changed, 212 insertions(+), 87 deletions(-) diff --git a/google/cloud/dialogflow/v2/audio_config.proto b/google/cloud/dialogflow/v2/audio_config.proto index e0055bba..47ae9696 100644 --- a/google/cloud/dialogflow/v2/audio_config.proto +++ b/google/cloud/dialogflow/v2/audio_config.proto @@ -79,47 +79,32 @@ enum AudioEncoding { AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7; } -// Instructs the speech recognizer how to process the audio content. -message InputAudioConfig { - // Required. Audio encoding of the audio content to process. - AudioEncoding audio_encoding = 1; - - // Required. Sample rate (in Hertz) of the audio content sent in the query. - // Refer to - // [Cloud Speech API - // documentation](https://cloud.google.com/speech-to-text/docs/basics) for - // more details. - int32 sample_rate_hertz = 2; - - // Required. The language of the supplied audio. Dialogflow does not do - // translations. See [Language - // Support](https://cloud.google.com/dialogflow/docs/reference/language) - // for a list of the currently supported language codes. Note that queries in - // the same session do not necessarily need to specify the same language. - string language_code = 3; - +// Hints for the speech recognizer to help with recognition in a specific +// conversation state. +message SpeechContext { // Optional. A list of strings containing words and phrases that the speech // recognizer should recognize with higher likelihood. // - // See [the Cloud Speech - // documentation](https://cloud.google.com/speech-to-text/docs/basics#phrase-hints) - // for more details. - repeated string phrase_hints = 4; + // This list can be used to: + // * improve accuracy for words and phrases you expect the user to say, + // e.g. typical commands for your Dialogflow agent + // * add additional words to the speech recognizer vocabulary + // * ... + // + // See the [Cloud Speech + // documentation](https://cloud.google.com/speech-to-text/quotas) for usage + // limits. + repeated string phrases = 1; - // Optional. Which variant of the [Speech model][google.cloud.dialogflow.v2.InputAudioConfig.model] to use. - SpeechModelVariant model_variant = 10; - - // Optional. If `false` (default), recognition does not cease until the - // client closes the stream. - // If `true`, the recognizer will detect a single spoken utterance in input - // audio. Recognition ceases when it detects the audio's voice has - // stopped or paused. In this case, once a detected intent is received, the - // client should close the stream and start a new request with a new stream as - // needed. - // Note: This setting is relevant only for streaming methods. - // Note: When specified, InputAudioConfig.single_utterance takes precedence - // over StreamingDetectIntentRequest.single_utterance. - bool single_utterance = 8; + // Optional. Boost for this context compared to other contexts: + // * If the boost is positive, Dialogflow will increase the probability that + // the phrases in this context are recognized over similar sounding phrases. + // * If the boost is unspecified or non-positive, Dialogflow will not apply + // any boost. + // + // Dialogflow recommends that you use boosts in the range (0, 20] and that you + // find a value that fits your use case with binary search. + float boost = 2; } // Variant of the specified [Speech model][google.cloud.dialogflow.v2.InputAudioConfig.model] to use. @@ -165,6 +150,124 @@ enum SpeechModelVariant { USE_ENHANCED = 3; } +// Information for a word recognized by the speech recognizer. +message SpeechWordInfo { + // The word this info is for. + string word = 3; + + // Time offset relative to the beginning of the audio that corresponds to the + // start of the spoken word. This is an experimental feature and the accuracy + // of the time offset can vary. + google.protobuf.Duration start_offset = 1; + + // Time offset relative to the beginning of the audio that corresponds to the + // end of the spoken word. This is an experimental feature and the accuracy of + // the time offset can vary. + google.protobuf.Duration end_offset = 2; + + // The Speech confidence between 0.0 and 1.0 for this word. A higher number + // indicates an estimated greater likelihood that the recognized word is + // correct. The default of 0.0 is a sentinel value indicating that confidence + // was not set. + // + // This field is not guaranteed to be fully stable over time for the same + // audio input. Users should also not rely on it to always be provided. + float confidence = 4; +} + +// Instructs the speech recognizer how to process the audio content. +message InputAudioConfig { + // Required. Audio encoding of the audio content to process. + AudioEncoding audio_encoding = 1; + + // Required. Sample rate (in Hertz) of the audio content sent in the query. + // Refer to + // [Cloud Speech API + // documentation](https://cloud.google.com/speech-to-text/docs/basics) for + // more details. + int32 sample_rate_hertz = 2; + + // Required. The language of the supplied audio. Dialogflow does not do + // translations. See [Language + // Support](https://cloud.google.com/dialogflow/docs/reference/language) + // for a list of the currently supported language codes. Note that queries in + // the same session do not necessarily need to specify the same language. + string language_code = 3; + + // Optional. If `true`, Dialogflow returns [SpeechWordInfo][google.cloud.dialogflow.v2.SpeechWordInfo] in + // [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult] with information about the recognized speech + // words, e.g. start and end time offsets. If false or unspecified, Speech + // doesn't return any word-level information. + bool enable_word_info = 13 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. A list of strings containing words and phrases that the speech + // recognizer should recognize with higher likelihood. + // + // See [the Cloud Speech + // documentation](https://cloud.google.com/speech-to-text/docs/basics#phrase-hints) + // for more details. + // + // This field is deprecated. Please use [speech_contexts]() instead. If you + // specify both [phrase_hints]() and [speech_contexts](), Dialogflow will + // treat the [phrase_hints]() as a single additional [SpeechContext](). + repeated string phrase_hints = 4 [ + deprecated = true, + (google.api.field_behavior) = OPTIONAL + ]; + + // Optional. Context information to assist speech recognition. + // + // See [the Cloud Speech + // documentation](https://cloud.google.com/speech-to-text/docs/basics#phrase-hints) + // for more details. + repeated SpeechContext speech_contexts = 11 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Which Speech model to select for the given request. Select the + // model best suited to your domain to get best results. If a model is not + // explicitly specified, then we auto-select a model based on the parameters + // in the InputAudioConfig. + // If enhanced speech model is enabled for the agent and an enhanced + // version of the specified model for the language does not exist, then the + // speech is recognized using the standard version of the specified model. + // Refer to + // [Cloud Speech API + // documentation](https://cloud.google.com/speech-to-text/docs/basics#select-model) + // for more details. + string model = 7 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Which variant of the [Speech model][google.cloud.dialogflow.v2.InputAudioConfig.model] to use. + SpeechModelVariant model_variant = 10; + + // Optional. If `false` (default), recognition does not cease until the + // client closes the stream. + // If `true`, the recognizer will detect a single spoken utterance in input + // audio. Recognition ceases when it detects the audio's voice has + // stopped or paused. In this case, once a detected intent is received, the + // client should close the stream and start a new request with a new stream as + // needed. + // Note: This setting is relevant only for streaming methods. + // Note: When specified, InputAudioConfig.single_utterance takes precedence + // over StreamingDetectIntentRequest.single_utterance. + bool single_utterance = 8; +} + +// Gender of the voice as described in +// [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice). +enum SsmlVoiceGender { + // An unspecified gender, which means that the client doesn't care which + // gender the selected voice will have. + SSML_VOICE_GENDER_UNSPECIFIED = 0; + + // A male voice. + SSML_VOICE_GENDER_MALE = 1; + + // A female voice. + SSML_VOICE_GENDER_FEMALE = 2; + + // A gender-neutral voice. + SSML_VOICE_GENDER_NEUTRAL = 3; +} + // Description of which voice to use for speech synthesis. message VoiceSelectionParams { // Optional. The name of the voice. If not set, the service will choose a @@ -212,39 +315,6 @@ message SynthesizeSpeechConfig { VoiceSelectionParams voice = 4; } -// Gender of the voice as described in -// [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice). -enum SsmlVoiceGender { - // An unspecified gender, which means that the client doesn't care which - // gender the selected voice will have. - SSML_VOICE_GENDER_UNSPECIFIED = 0; - - // A male voice. - SSML_VOICE_GENDER_MALE = 1; - - // A female voice. - SSML_VOICE_GENDER_FEMALE = 2; - - // A gender-neutral voice. - SSML_VOICE_GENDER_NEUTRAL = 3; -} - -// Instructs the speech synthesizer on how to generate the output audio content. -message OutputAudioConfig { - // Required. Audio encoding of the synthesized audio content. - OutputAudioEncoding audio_encoding = 1; - - // Optional. The synthesis sample rate (in hertz) for this audio. If not - // provided, then the synthesizer will use the default sample rate based on - // the audio encoding. If this is different from the voice's natural sample - // rate, then the synthesizer will honor this request by converting to the - // desired sample rate (which might result in worse audio quality). - int32 sample_rate_hertz = 2; - - // Optional. Configuration of how speech should be synthesized. - SynthesizeSpeechConfig synthesize_speech_config = 3; -} - // Audio encoding of the output audio format in Text-To-Speech. enum OutputAudioEncoding { // Not specified. @@ -263,3 +333,19 @@ enum OutputAudioEncoding { // than MP3 while using approximately the same bitrate. OUTPUT_AUDIO_ENCODING_OGG_OPUS = 3; } + +// Instructs the speech synthesizer on how to generate the output audio content. +message OutputAudioConfig { + // Required. Audio encoding of the synthesized audio content. + OutputAudioEncoding audio_encoding = 1; + + // Optional. The synthesis sample rate (in hertz) for this audio. If not + // provided, then the synthesizer will use the default sample rate based on + // the audio encoding. If this is different from the voice's natural sample + // rate, then the synthesizer will honor this request by converting to the + // desired sample rate (which might result in worse audio quality). + int32 sample_rate_hertz = 2; + + // Optional. Configuration of how speech should be synthesized. + SynthesizeSpeechConfig synthesize_speech_config = 3; +} diff --git a/google/cloud/dialogflow/v2/context.proto b/google/cloud/dialogflow/v2/context.proto index 33dd5673..23aeae8c 100644 --- a/google/cloud/dialogflow/v2/context.proto +++ b/google/cloud/dialogflow/v2/context.proto @@ -117,6 +117,13 @@ message Context { // // The `Context ID` is always converted to lowercase, may only contain // characters in [a-zA-Z0-9_-%] and may be at most 250 bytes long. + // + // The following context names are reserved for internal use by Dialogflow. + // You should not use these contexts or create contexts with these names: + // + // * `__system_counters__` + // * `*_id_dialog_context` + // * `*_dialog_params_size` string name = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. The number of conversational query requests after which the diff --git a/google/cloud/dialogflow/v2/entity_type.proto b/google/cloud/dialogflow/v2/entity_type.proto index 969dff0a..419d438c 100644 --- a/google/cloud/dialogflow/v2/entity_type.proto +++ b/google/cloud/dialogflow/v2/entity_type.proto @@ -161,6 +161,7 @@ service EntityTypes { // method does not affect entities in the entity type that aren't explicitly // specified in the request. // + // // Operation rpc BatchUpdateEntities(BatchUpdateEntitiesRequest) returns (google.longrunning.Operation) { option (google.api.http) = { @@ -177,6 +178,7 @@ service EntityTypes { // Deletes entities in the specified entity type. // + // // Operation rpc BatchDeleteEntities(BatchDeleteEntitiesRequest) returns (google.longrunning.Operation) { option (google.api.http) = { diff --git a/google/cloud/dialogflow/v2/intent.proto b/google/cloud/dialogflow/v2/intent.proto index e9ee5f5f..ed1fec3d 100644 --- a/google/cloud/dialogflow/v2/intent.proto +++ b/google/cloud/dialogflow/v2/intent.proto @@ -898,8 +898,9 @@ message Intent { // Read-only after creation. The unique identifier of the parent intent in the // chain of followup intents. You can set this field when creating an intent, - // for example with [CreateIntent][] or [BatchUpdateIntents][], in order to - // make this intent a followup intent. + // for example with [CreateIntent][google.cloud.dialogflow.v2.Intents.CreateIntent] or + // [BatchUpdateIntents][google.cloud.dialogflow.v2.Intents.BatchUpdateIntents], in order to make this + // intent a followup intent. // // It identifies the parent followup intent. // Format: `projects//agent/intents/`. diff --git a/google/cloud/dialogflow/v2/session.proto b/google/cloud/dialogflow/v2/session.proto index 86c89323..2688d6c4 100644 --- a/google/cloud/dialogflow/v2/session.proto +++ b/google/cloud/dialogflow/v2/session.proto @@ -259,9 +259,13 @@ message QueryResult { // the greatest `knowledgeAnswers.match_confidence` value in the list. float intent_detection_confidence = 12; - // The free-form diagnostic info. For example, this field could contain - // webhook call latency. The string keys of the Struct's fields map can change - // without notice. + // Free-form diagnostic information for the associated detect intent request. + // The fields of this data can change without notice, so you should not write + // code that depends on its structure. + // The data may contain: + // + // - webhook call latency + // - webhook errors google.protobuf.Struct diagnostic_info = 14; // The sentiment analysis result, which depends on the @@ -270,23 +274,26 @@ message QueryResult { } // The top-level message sent by the client to the -// [StreamingDetectIntent][] method. +// [Sessions.StreamingDetectIntent][google.cloud.dialogflow.v2.Sessions.StreamingDetectIntent] method. // // Multiple request messages should be sent in order: // -// 1. The first message must contain [StreamingDetectIntentRequest.session][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.session], -// [StreamingDetectIntentRequest.query_input] plus optionally -// [StreamingDetectIntentRequest.query_params]. If the client wants to -// receive an audio response, it should also contain -// [StreamingDetectIntentRequest.output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config]. The message -// must not contain [StreamingDetectIntentRequest.input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio]. -// 2. If [StreamingDetectIntentRequest.query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input] was set to -// [StreamingDetectIntentRequest.query_input.audio_config][], all subsequent -// messages must contain [StreamingDetectIntentRequest.input_audio] to -// continue with Speech recognition. +// 1. The first message must contain +// [session][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.session], +// [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input] plus optionally +// [query_params][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_params]. If the client +// wants to receive an audio response, it should also contain +// [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config]. +// The message must not contain +// [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio]. +// 2. If [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input] was set to +// [query_input.audio_config][google.cloud.dialogflow.v2.InputAudioConfig], all subsequent +// messages must contain +// [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio] to continue with +// Speech recognition. // If you decide to rather detect an intent from text input after you // already started Speech recognition, please send a message with -// [StreamingDetectIntentRequest.query_input.text][]. +// [query_input.text][google.cloud.dialogflow.v2.QueryInput.text]. // // However, note that: // @@ -453,6 +460,15 @@ message StreamingRecognitionResult { // This field is typically only provided if `is_final` is true and you should // not rely on it being accurate or even set. float confidence = 4; + + // Word-specific information for the words recognized by Speech in + // [transcript][google.cloud.dialogflow.v2.StreamingRecognitionResult.transcript]. Populated if and only if `message_type` = `TRANSCRIPT` and + // [InputAudioConfig.enable_word_info] is set. + repeated SpeechWordInfo speech_word_info = 7; + + // Time offset of the end of this Speech recognition result relative to the + // beginning of the audio. Only populated for `message_type` = `TRANSCRIPT`. + google.protobuf.Duration speech_end_offset = 8; } // Represents the natural language text to be processed. diff --git a/google/cloud/dialogflow/v2/webhook.proto b/google/cloud/dialogflow/v2/webhook.proto index 377eaa71..3e0dadf7 100644 --- a/google/cloud/dialogflow/v2/webhook.proto +++ b/google/cloud/dialogflow/v2/webhook.proto @@ -55,6 +55,19 @@ message WebhookRequest { } // The response message for a webhook call. +// +// This response is validated by the Dialogflow server. If validation fails, +// an error will be returned in the [QueryResult.diagnostic_info][google.cloud.dialogflow.v2.QueryResult.diagnostic_info] field. +// Setting JSON fields to an empty value with the wrong type is a common error. +// To avoid this error: +// +// - Use `""` for empty strings +// - Use `{}` or `null` for empty objects +// - Use `[]` or `null` for empty arrays +// +// For more information, see the +// [Protocol Buffers Language +// Guide](https://developers.google.com/protocol-buffers/docs/proto3#json). message WebhookResponse { // Optional. The text to be shown on the screen. This value is passed directly // to `QueryResult.fulfillment_text`.