parent
192c140298
commit
ff4a2047b3
|
|
@ -79,47 +79,32 @@ enum AudioEncoding {
|
|||
AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7;
|
||||
}
|
||||
|
||||
// Instructs the speech recognizer how to process the audio content.
|
||||
message InputAudioConfig {
|
||||
// Required. Audio encoding of the audio content to process.
|
||||
AudioEncoding audio_encoding = 1;
|
||||
|
||||
// Required. Sample rate (in Hertz) of the audio content sent in the query.
|
||||
// Refer to
|
||||
// [Cloud Speech API
|
||||
// documentation](https://cloud.google.com/speech-to-text/docs/basics) for
|
||||
// more details.
|
||||
int32 sample_rate_hertz = 2;
|
||||
|
||||
// Required. The language of the supplied audio. Dialogflow does not do
|
||||
// translations. See [Language
|
||||
// Support](https://cloud.google.com/dialogflow/docs/reference/language)
|
||||
// for a list of the currently supported language codes. Note that queries in
|
||||
// the same session do not necessarily need to specify the same language.
|
||||
string language_code = 3;
|
||||
|
||||
// Hints for the speech recognizer to help with recognition in a specific
|
||||
// conversation state.
|
||||
message SpeechContext {
|
||||
// Optional. A list of strings containing words and phrases that the speech
|
||||
// recognizer should recognize with higher likelihood.
|
||||
//
|
||||
// See [the Cloud Speech
|
||||
// documentation](https://cloud.google.com/speech-to-text/docs/basics#phrase-hints)
|
||||
// for more details.
|
||||
repeated string phrase_hints = 4;
|
||||
// This list can be used to:
|
||||
// * improve accuracy for words and phrases you expect the user to say,
|
||||
// e.g. typical commands for your Dialogflow agent
|
||||
// * add additional words to the speech recognizer vocabulary
|
||||
// * ...
|
||||
//
|
||||
// See the [Cloud Speech
|
||||
// documentation](https://cloud.google.com/speech-to-text/quotas) for usage
|
||||
// limits.
|
||||
repeated string phrases = 1;
|
||||
|
||||
// Optional. Which variant of the [Speech model][google.cloud.dialogflow.v2.InputAudioConfig.model] to use.
|
||||
SpeechModelVariant model_variant = 10;
|
||||
|
||||
// Optional. If `false` (default), recognition does not cease until the
|
||||
// client closes the stream.
|
||||
// If `true`, the recognizer will detect a single spoken utterance in input
|
||||
// audio. Recognition ceases when it detects the audio's voice has
|
||||
// stopped or paused. In this case, once a detected intent is received, the
|
||||
// client should close the stream and start a new request with a new stream as
|
||||
// needed.
|
||||
// Note: This setting is relevant only for streaming methods.
|
||||
// Note: When specified, InputAudioConfig.single_utterance takes precedence
|
||||
// over StreamingDetectIntentRequest.single_utterance.
|
||||
bool single_utterance = 8;
|
||||
// Optional. Boost for this context compared to other contexts:
|
||||
// * If the boost is positive, Dialogflow will increase the probability that
|
||||
// the phrases in this context are recognized over similar sounding phrases.
|
||||
// * If the boost is unspecified or non-positive, Dialogflow will not apply
|
||||
// any boost.
|
||||
//
|
||||
// Dialogflow recommends that you use boosts in the range (0, 20] and that you
|
||||
// find a value that fits your use case with binary search.
|
||||
float boost = 2;
|
||||
}
|
||||
|
||||
// Variant of the specified [Speech model][google.cloud.dialogflow.v2.InputAudioConfig.model] to use.
|
||||
|
|
@ -165,6 +150,124 @@ enum SpeechModelVariant {
|
|||
USE_ENHANCED = 3;
|
||||
}
|
||||
|
||||
// Information for a word recognized by the speech recognizer.
|
||||
message SpeechWordInfo {
|
||||
// The word this info is for.
|
||||
string word = 3;
|
||||
|
||||
// Time offset relative to the beginning of the audio that corresponds to the
|
||||
// start of the spoken word. This is an experimental feature and the accuracy
|
||||
// of the time offset can vary.
|
||||
google.protobuf.Duration start_offset = 1;
|
||||
|
||||
// Time offset relative to the beginning of the audio that corresponds to the
|
||||
// end of the spoken word. This is an experimental feature and the accuracy of
|
||||
// the time offset can vary.
|
||||
google.protobuf.Duration end_offset = 2;
|
||||
|
||||
// The Speech confidence between 0.0 and 1.0 for this word. A higher number
|
||||
// indicates an estimated greater likelihood that the recognized word is
|
||||
// correct. The default of 0.0 is a sentinel value indicating that confidence
|
||||
// was not set.
|
||||
//
|
||||
// This field is not guaranteed to be fully stable over time for the same
|
||||
// audio input. Users should also not rely on it to always be provided.
|
||||
float confidence = 4;
|
||||
}
|
||||
|
||||
// Instructs the speech recognizer how to process the audio content.
|
||||
message InputAudioConfig {
|
||||
// Required. Audio encoding of the audio content to process.
|
||||
AudioEncoding audio_encoding = 1;
|
||||
|
||||
// Required. Sample rate (in Hertz) of the audio content sent in the query.
|
||||
// Refer to
|
||||
// [Cloud Speech API
|
||||
// documentation](https://cloud.google.com/speech-to-text/docs/basics) for
|
||||
// more details.
|
||||
int32 sample_rate_hertz = 2;
|
||||
|
||||
// Required. The language of the supplied audio. Dialogflow does not do
|
||||
// translations. See [Language
|
||||
// Support](https://cloud.google.com/dialogflow/docs/reference/language)
|
||||
// for a list of the currently supported language codes. Note that queries in
|
||||
// the same session do not necessarily need to specify the same language.
|
||||
string language_code = 3;
|
||||
|
||||
// Optional. If `true`, Dialogflow returns [SpeechWordInfo][google.cloud.dialogflow.v2.SpeechWordInfo] in
|
||||
// [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult] with information about the recognized speech
|
||||
// words, e.g. start and end time offsets. If false or unspecified, Speech
|
||||
// doesn't return any word-level information.
|
||||
bool enable_word_info = 13 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. A list of strings containing words and phrases that the speech
|
||||
// recognizer should recognize with higher likelihood.
|
||||
//
|
||||
// See [the Cloud Speech
|
||||
// documentation](https://cloud.google.com/speech-to-text/docs/basics#phrase-hints)
|
||||
// for more details.
|
||||
//
|
||||
// This field is deprecated. Please use [speech_contexts]() instead. If you
|
||||
// specify both [phrase_hints]() and [speech_contexts](), Dialogflow will
|
||||
// treat the [phrase_hints]() as a single additional [SpeechContext]().
|
||||
repeated string phrase_hints = 4 [
|
||||
deprecated = true,
|
||||
(google.api.field_behavior) = OPTIONAL
|
||||
];
|
||||
|
||||
// Optional. Context information to assist speech recognition.
|
||||
//
|
||||
// See [the Cloud Speech
|
||||
// documentation](https://cloud.google.com/speech-to-text/docs/basics#phrase-hints)
|
||||
// for more details.
|
||||
repeated SpeechContext speech_contexts = 11 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. Which Speech model to select for the given request. Select the
|
||||
// model best suited to your domain to get best results. If a model is not
|
||||
// explicitly specified, then we auto-select a model based on the parameters
|
||||
// in the InputAudioConfig.
|
||||
// If enhanced speech model is enabled for the agent and an enhanced
|
||||
// version of the specified model for the language does not exist, then the
|
||||
// speech is recognized using the standard version of the specified model.
|
||||
// Refer to
|
||||
// [Cloud Speech API
|
||||
// documentation](https://cloud.google.com/speech-to-text/docs/basics#select-model)
|
||||
// for more details.
|
||||
string model = 7 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. Which variant of the [Speech model][google.cloud.dialogflow.v2.InputAudioConfig.model] to use.
|
||||
SpeechModelVariant model_variant = 10;
|
||||
|
||||
// Optional. If `false` (default), recognition does not cease until the
|
||||
// client closes the stream.
|
||||
// If `true`, the recognizer will detect a single spoken utterance in input
|
||||
// audio. Recognition ceases when it detects the audio's voice has
|
||||
// stopped or paused. In this case, once a detected intent is received, the
|
||||
// client should close the stream and start a new request with a new stream as
|
||||
// needed.
|
||||
// Note: This setting is relevant only for streaming methods.
|
||||
// Note: When specified, InputAudioConfig.single_utterance takes precedence
|
||||
// over StreamingDetectIntentRequest.single_utterance.
|
||||
bool single_utterance = 8;
|
||||
}
|
||||
|
||||
// Gender of the voice as described in
|
||||
// [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice).
|
||||
enum SsmlVoiceGender {
|
||||
// An unspecified gender, which means that the client doesn't care which
|
||||
// gender the selected voice will have.
|
||||
SSML_VOICE_GENDER_UNSPECIFIED = 0;
|
||||
|
||||
// A male voice.
|
||||
SSML_VOICE_GENDER_MALE = 1;
|
||||
|
||||
// A female voice.
|
||||
SSML_VOICE_GENDER_FEMALE = 2;
|
||||
|
||||
// A gender-neutral voice.
|
||||
SSML_VOICE_GENDER_NEUTRAL = 3;
|
||||
}
|
||||
|
||||
// Description of which voice to use for speech synthesis.
|
||||
message VoiceSelectionParams {
|
||||
// Optional. The name of the voice. If not set, the service will choose a
|
||||
|
|
@ -212,39 +315,6 @@ message SynthesizeSpeechConfig {
|
|||
VoiceSelectionParams voice = 4;
|
||||
}
|
||||
|
||||
// Gender of the voice as described in
|
||||
// [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice).
|
||||
enum SsmlVoiceGender {
|
||||
// An unspecified gender, which means that the client doesn't care which
|
||||
// gender the selected voice will have.
|
||||
SSML_VOICE_GENDER_UNSPECIFIED = 0;
|
||||
|
||||
// A male voice.
|
||||
SSML_VOICE_GENDER_MALE = 1;
|
||||
|
||||
// A female voice.
|
||||
SSML_VOICE_GENDER_FEMALE = 2;
|
||||
|
||||
// A gender-neutral voice.
|
||||
SSML_VOICE_GENDER_NEUTRAL = 3;
|
||||
}
|
||||
|
||||
// Instructs the speech synthesizer on how to generate the output audio content.
|
||||
message OutputAudioConfig {
|
||||
// Required. Audio encoding of the synthesized audio content.
|
||||
OutputAudioEncoding audio_encoding = 1;
|
||||
|
||||
// Optional. The synthesis sample rate (in hertz) for this audio. If not
|
||||
// provided, then the synthesizer will use the default sample rate based on
|
||||
// the audio encoding. If this is different from the voice's natural sample
|
||||
// rate, then the synthesizer will honor this request by converting to the
|
||||
// desired sample rate (which might result in worse audio quality).
|
||||
int32 sample_rate_hertz = 2;
|
||||
|
||||
// Optional. Configuration of how speech should be synthesized.
|
||||
SynthesizeSpeechConfig synthesize_speech_config = 3;
|
||||
}
|
||||
|
||||
// Audio encoding of the output audio format in Text-To-Speech.
|
||||
enum OutputAudioEncoding {
|
||||
// Not specified.
|
||||
|
|
@ -263,3 +333,19 @@ enum OutputAudioEncoding {
|
|||
// than MP3 while using approximately the same bitrate.
|
||||
OUTPUT_AUDIO_ENCODING_OGG_OPUS = 3;
|
||||
}
|
||||
|
||||
// Instructs the speech synthesizer on how to generate the output audio content.
|
||||
message OutputAudioConfig {
|
||||
// Required. Audio encoding of the synthesized audio content.
|
||||
OutputAudioEncoding audio_encoding = 1;
|
||||
|
||||
// Optional. The synthesis sample rate (in hertz) for this audio. If not
|
||||
// provided, then the synthesizer will use the default sample rate based on
|
||||
// the audio encoding. If this is different from the voice's natural sample
|
||||
// rate, then the synthesizer will honor this request by converting to the
|
||||
// desired sample rate (which might result in worse audio quality).
|
||||
int32 sample_rate_hertz = 2;
|
||||
|
||||
// Optional. Configuration of how speech should be synthesized.
|
||||
SynthesizeSpeechConfig synthesize_speech_config = 3;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -117,6 +117,13 @@ message Context {
|
|||
//
|
||||
// The `Context ID` is always converted to lowercase, may only contain
|
||||
// characters in [a-zA-Z0-9_-%] and may be at most 250 bytes long.
|
||||
//
|
||||
// The following context names are reserved for internal use by Dialogflow.
|
||||
// You should not use these contexts or create contexts with these names:
|
||||
//
|
||||
// * `__system_counters__`
|
||||
// * `*_id_dialog_context`
|
||||
// * `*_dialog_params_size`
|
||||
string name = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Optional. The number of conversational query requests after which the
|
||||
|
|
|
|||
|
|
@ -161,6 +161,7 @@ service EntityTypes {
|
|||
// method does not affect entities in the entity type that aren't explicitly
|
||||
// specified in the request.
|
||||
//
|
||||
//
|
||||
// Operation <response: [google.protobuf.Empty][google.protobuf.Empty]>
|
||||
rpc BatchUpdateEntities(BatchUpdateEntitiesRequest) returns (google.longrunning.Operation) {
|
||||
option (google.api.http) = {
|
||||
|
|
@ -177,6 +178,7 @@ service EntityTypes {
|
|||
|
||||
// Deletes entities in the specified entity type.
|
||||
//
|
||||
//
|
||||
// Operation <response: [google.protobuf.Empty][google.protobuf.Empty]>
|
||||
rpc BatchDeleteEntities(BatchDeleteEntitiesRequest) returns (google.longrunning.Operation) {
|
||||
option (google.api.http) = {
|
||||
|
|
|
|||
|
|
@ -898,8 +898,9 @@ message Intent {
|
|||
|
||||
// Read-only after creation. The unique identifier of the parent intent in the
|
||||
// chain of followup intents. You can set this field when creating an intent,
|
||||
// for example with [CreateIntent][] or [BatchUpdateIntents][], in order to
|
||||
// make this intent a followup intent.
|
||||
// for example with [CreateIntent][google.cloud.dialogflow.v2.Intents.CreateIntent] or
|
||||
// [BatchUpdateIntents][google.cloud.dialogflow.v2.Intents.BatchUpdateIntents], in order to make this
|
||||
// intent a followup intent.
|
||||
//
|
||||
// It identifies the parent followup intent.
|
||||
// Format: `projects/<Project ID>/agent/intents/<Intent ID>`.
|
||||
|
|
|
|||
|
|
@ -259,9 +259,13 @@ message QueryResult {
|
|||
// the greatest `knowledgeAnswers.match_confidence` value in the list.
|
||||
float intent_detection_confidence = 12;
|
||||
|
||||
// The free-form diagnostic info. For example, this field could contain
|
||||
// webhook call latency. The string keys of the Struct's fields map can change
|
||||
// without notice.
|
||||
// Free-form diagnostic information for the associated detect intent request.
|
||||
// The fields of this data can change without notice, so you should not write
|
||||
// code that depends on its structure.
|
||||
// The data may contain:
|
||||
//
|
||||
// - webhook call latency
|
||||
// - webhook errors
|
||||
google.protobuf.Struct diagnostic_info = 14;
|
||||
|
||||
// The sentiment analysis result, which depends on the
|
||||
|
|
@ -270,23 +274,26 @@ message QueryResult {
|
|||
}
|
||||
|
||||
// The top-level message sent by the client to the
|
||||
// [StreamingDetectIntent][] method.
|
||||
// [Sessions.StreamingDetectIntent][google.cloud.dialogflow.v2.Sessions.StreamingDetectIntent] method.
|
||||
//
|
||||
// Multiple request messages should be sent in order:
|
||||
//
|
||||
// 1. The first message must contain [StreamingDetectIntentRequest.session][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.session],
|
||||
// [StreamingDetectIntentRequest.query_input] plus optionally
|
||||
// [StreamingDetectIntentRequest.query_params]. If the client wants to
|
||||
// receive an audio response, it should also contain
|
||||
// [StreamingDetectIntentRequest.output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config]. The message
|
||||
// must not contain [StreamingDetectIntentRequest.input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio].
|
||||
// 2. If [StreamingDetectIntentRequest.query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input] was set to
|
||||
// [StreamingDetectIntentRequest.query_input.audio_config][], all subsequent
|
||||
// messages must contain [StreamingDetectIntentRequest.input_audio] to
|
||||
// continue with Speech recognition.
|
||||
// 1. The first message must contain
|
||||
// [session][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.session],
|
||||
// [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input] plus optionally
|
||||
// [query_params][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_params]. If the client
|
||||
// wants to receive an audio response, it should also contain
|
||||
// [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config].
|
||||
// The message must not contain
|
||||
// [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio].
|
||||
// 2. If [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input] was set to
|
||||
// [query_input.audio_config][google.cloud.dialogflow.v2.InputAudioConfig], all subsequent
|
||||
// messages must contain
|
||||
// [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio] to continue with
|
||||
// Speech recognition.
|
||||
// If you decide to rather detect an intent from text input after you
|
||||
// already started Speech recognition, please send a message with
|
||||
// [StreamingDetectIntentRequest.query_input.text][].
|
||||
// [query_input.text][google.cloud.dialogflow.v2.QueryInput.text].
|
||||
//
|
||||
// However, note that:
|
||||
//
|
||||
|
|
@ -453,6 +460,15 @@ message StreamingRecognitionResult {
|
|||
// This field is typically only provided if `is_final` is true and you should
|
||||
// not rely on it being accurate or even set.
|
||||
float confidence = 4;
|
||||
|
||||
// Word-specific information for the words recognized by Speech in
|
||||
// [transcript][google.cloud.dialogflow.v2.StreamingRecognitionResult.transcript]. Populated if and only if `message_type` = `TRANSCRIPT` and
|
||||
// [InputAudioConfig.enable_word_info] is set.
|
||||
repeated SpeechWordInfo speech_word_info = 7;
|
||||
|
||||
// Time offset of the end of this Speech recognition result relative to the
|
||||
// beginning of the audio. Only populated for `message_type` = `TRANSCRIPT`.
|
||||
google.protobuf.Duration speech_end_offset = 8;
|
||||
}
|
||||
|
||||
// Represents the natural language text to be processed.
|
||||
|
|
|
|||
|
|
@ -55,6 +55,19 @@ message WebhookRequest {
|
|||
}
|
||||
|
||||
// The response message for a webhook call.
|
||||
//
|
||||
// This response is validated by the Dialogflow server. If validation fails,
|
||||
// an error will be returned in the [QueryResult.diagnostic_info][google.cloud.dialogflow.v2.QueryResult.diagnostic_info] field.
|
||||
// Setting JSON fields to an empty value with the wrong type is a common error.
|
||||
// To avoid this error:
|
||||
//
|
||||
// - Use `""` for empty strings
|
||||
// - Use `{}` or `null` for empty objects
|
||||
// - Use `[]` or `null` for empty arrays
|
||||
//
|
||||
// For more information, see the
|
||||
// [Protocol Buffers Language
|
||||
// Guide](https://developers.google.com/protocol-buffers/docs/proto3#json).
|
||||
message WebhookResponse {
|
||||
// Optional. The text to be shown on the screen. This value is passed directly
|
||||
// to `QueryResult.fulfillment_text`.
|
||||
|
|
|
|||
Loading…
Reference in New Issue