diff --git a/google/cloud/dialogflow/v2/agent.proto b/google/cloud/dialogflow/v2/agent.proto index 23a3d44f..9459029a 100644 --- a/google/cloud/dialogflow/v2/agent.proto +++ b/google/cloud/dialogflow/v2/agent.proto @@ -33,34 +33,7 @@ option java_outer_classname = "AgentProto"; option java_package = "com.google.cloud.dialogflow.v2"; option objc_class_prefix = "DF"; -// Agents are best described as Natural Language Understanding (NLU) modules -// that transform user requests into actionable data. You can include agents -// in your app, product, or service to determine user intent and respond to the -// user in a natural way. -// -// After you create an agent, you can add [Intents][google.cloud.dialogflow.v2.Intents], [Contexts][google.cloud.dialogflow.v2.Contexts], -// [Entity Types][google.cloud.dialogflow.v2.EntityTypes], [Webhooks][google.cloud.dialogflow.v2.WebhookRequest], and so on to -// manage the flow of a conversation and match user input to predefined intents -// and actions. -// -// You can create an agent using both Dialogflow Standard Edition and -// Dialogflow Enterprise Edition. For details, see -// [Dialogflow -// Editions](https://cloud.google.com/dialogflow/docs/editions). -// -// You can save your agent for backup or versioning by exporting the agent by -// using the [ExportAgent][google.cloud.dialogflow.v2.Agents.ExportAgent] method. You can import a saved -// agent by using the [ImportAgent][google.cloud.dialogflow.v2.Agents.ImportAgent] method. -// -// Dialogflow provides several -// [prebuilt -// agents](https://cloud.google.com/dialogflow/docs/agents-prebuilt) -// for common conversation scenarios such as determining a date and time, -// converting currency, and so on. -// -// For more information about agents, see the -// [Dialogflow -// documentation](https://cloud.google.com/dialogflow/docs/agents-overview). +// Service for managing [Agents][google.cloud.dialogflow.v2.Agent]. service Agents { option (google.api.default_host) = "dialogflow.googleapis.com"; option (google.api.oauth_scopes) = @@ -180,7 +153,16 @@ service Agents { } } -// Represents a conversational agent. +// A Dialogflow agent is a virtual agent that handles conversations with your +// end-users. It is a natural language understanding module that understands the +// nuances of human language. Dialogflow translates end-user text or audio +// during a conversation to structured data that your apps and services can +// understand. You design and build a Dialogflow agent to handle the types of +// conversations required for your system. +// +// For more information about agents, see the +// [Agents +// documentation](https://cloud.google.com/dialogflow/docs/agents-overview). message Agent { option (google.api.resource) = { type: "dialogflow.googleapis.com/Agent" diff --git a/google/cloud/dialogflow/v2/audio_config.proto b/google/cloud/dialogflow/v2/audio_config.proto index 975ea69a..ec0602f5 100644 --- a/google/cloud/dialogflow/v2/audio_config.proto +++ b/google/cloud/dialogflow/v2/audio_config.proto @@ -29,6 +29,35 @@ option java_outer_classname = "AudioConfigProto"; option java_package = "com.google.cloud.dialogflow.v2"; option objc_class_prefix = "DF"; +// Hints for the speech recognizer to help with recognition in a specific +// conversation state. +message SpeechContext { + // Optional. A list of strings containing words and phrases that the speech + // recognizer should recognize with higher likelihood. + // + // This list can be used to: + // * improve accuracy for words and phrases you expect the user to say, + // e.g. typical commands for your Dialogflow agent + // * add additional words to the speech recognizer vocabulary + // * ... + // + // See the [Cloud Speech + // documentation](https://cloud.google.com/speech-to-text/quotas) for usage + // limits. + repeated string phrases = 1; + + // Optional. Boost for this context compared to other contexts: + // + // * If the boost is positive, Dialogflow will increase the probability that + // the phrases in this context are recognized over similar sounding phrases. + // * If the boost is unspecified or non-positive, Dialogflow will not apply + // any boost. + // + // Dialogflow recommends that you use boosts in the range (0, 20] and that you + // find a value that fits your use case with binary search. + float boost = 2; +} + // Audio encoding of the audio content sent in the conversational query request. // Refer to the // [Cloud Speech API @@ -78,33 +107,29 @@ enum AudioEncoding { AUDIO_ENCODING_SPEEX_WITH_HEADER_BYTE = 7; } -// Hints for the speech recognizer to help with recognition in a specific -// conversation state. -message SpeechContext { - // Optional. A list of strings containing words and phrases that the speech - // recognizer should recognize with higher likelihood. - // - // This list can be used to: - // * improve accuracy for words and phrases you expect the user to say, - // e.g. typical commands for your Dialogflow agent - // * add additional words to the speech recognizer vocabulary - // * ... - // - // See the [Cloud Speech - // documentation](https://cloud.google.com/speech-to-text/quotas) for usage - // limits. - repeated string phrases = 1; +// Information for a word recognized by the speech recognizer. +message SpeechWordInfo { + // The word this info is for. + string word = 3; - // Optional. Boost for this context compared to other contexts: + // Time offset relative to the beginning of the audio that corresponds to the + // start of the spoken word. This is an experimental feature and the accuracy + // of the time offset can vary. + google.protobuf.Duration start_offset = 1; + + // Time offset relative to the beginning of the audio that corresponds to the + // end of the spoken word. This is an experimental feature and the accuracy of + // the time offset can vary. + google.protobuf.Duration end_offset = 2; + + // The Speech confidence between 0.0 and 1.0 for this word. A higher number + // indicates an estimated greater likelihood that the recognized word is + // correct. The default of 0.0 is a sentinel value indicating that confidence + // was not set. // - // * If the boost is positive, Dialogflow will increase the probability that - // the phrases in this context are recognized over similar sounding phrases. - // * If the boost is unspecified or non-positive, Dialogflow will not apply - // any boost. - // - // Dialogflow recommends that you use boosts in the range (0, 20] and that you - // find a value that fits your use case with binary search. - float boost = 2; + // This field is not guaranteed to be fully stable over time for the same + // audio input. Users should also not rely on it to always be provided. + float confidence = 4; } // Variant of the specified [Speech model][google.cloud.dialogflow.v2.InputAudioConfig.model] to use. @@ -150,31 +175,6 @@ enum SpeechModelVariant { USE_ENHANCED = 3; } -// Information for a word recognized by the speech recognizer. -message SpeechWordInfo { - // The word this info is for. - string word = 3; - - // Time offset relative to the beginning of the audio that corresponds to the - // start of the spoken word. This is an experimental feature and the accuracy - // of the time offset can vary. - google.protobuf.Duration start_offset = 1; - - // Time offset relative to the beginning of the audio that corresponds to the - // end of the spoken word. This is an experimental feature and the accuracy of - // the time offset can vary. - google.protobuf.Duration end_offset = 2; - - // The Speech confidence between 0.0 and 1.0 for this word. A higher number - // indicates an estimated greater likelihood that the recognized word is - // correct. The default of 0.0 is a sentinel value indicating that confidence - // was not set. - // - // This field is not guaranteed to be fully stable over time for the same - // audio input. Users should also not rely on it to always be provided. - float confidence = 4; -} - // Instructs the speech recognizer how to process the audio content. message InputAudioConfig { // Required. Audio encoding of the audio content to process. @@ -248,6 +248,21 @@ message InputAudioConfig { bool single_utterance = 8; } +// Description of which voice to use for speech synthesis. +message VoiceSelectionParams { + // Optional. The name of the voice. If not set, the service will choose a + // voice based on the other parameters such as language_code and + // [ssml_gender][google.cloud.dialogflow.v2.VoiceSelectionParams.ssml_gender]. + string name = 1; + + // Optional. The preferred gender of the voice. If not set, the service will + // choose a voice based on the other parameters such as language_code and + // [name][google.cloud.dialogflow.v2.VoiceSelectionParams.name]. Note that this is only a preference, not requirement. If a + // voice of the appropriate gender is not available, the synthesizer should + // substitute a voice with a different gender rather than failing the request. + SsmlVoiceGender ssml_gender = 2; +} + // Gender of the voice as described in // [SSML voice element](https://www.w3.org/TR/speech-synthesis11/#edef_voice). enum SsmlVoiceGender { @@ -265,21 +280,6 @@ enum SsmlVoiceGender { SSML_VOICE_GENDER_NEUTRAL = 3; } -// Description of which voice to use for speech synthesis. -message VoiceSelectionParams { - // Optional. The name of the voice. If not set, the service will choose a - // voice based on the other parameters such as language_code and - // [ssml_gender][google.cloud.dialogflow.v2.VoiceSelectionParams.ssml_gender]. - string name = 1; - - // Optional. The preferred gender of the voice. If not set, the service will - // choose a voice based on the other parameters such as language_code and - // [name][google.cloud.dialogflow.v2.VoiceSelectionParams.name]. Note that this is only a preference, not requirement. If a - // voice of the appropriate gender is not available, the synthesizer should - // substitute a voice with a different gender rather than failing the request. - SsmlVoiceGender ssml_gender = 2; -} - // Configuration of how speech should be synthesized. message SynthesizeSpeechConfig { // Optional. Speaking rate/speed, in the range [0.25, 4.0]. 1.0 is the normal @@ -312,6 +312,24 @@ message SynthesizeSpeechConfig { VoiceSelectionParams voice = 4; } +// Instructs the speech synthesizer on how to generate the output audio content. +// If this audio config is supplied in a request, it overrides all existing +// text-to-speech settings applied to the agent. +message OutputAudioConfig { + // Required. Audio encoding of the synthesized audio content. + OutputAudioEncoding audio_encoding = 1 [(google.api.field_behavior) = REQUIRED]; + + // The synthesis sample rate (in hertz) for this audio. If not + // provided, then the synthesizer will use the default sample rate based on + // the audio encoding. If this is different from the voice's natural sample + // rate, then the synthesizer will honor this request by converting to the + // desired sample rate (which might result in worse audio quality). + int32 sample_rate_hertz = 2; + + // Configuration of how speech should be synthesized. + SynthesizeSpeechConfig synthesize_speech_config = 3; +} + // Audio encoding of the output audio format in Text-To-Speech. enum OutputAudioEncoding { // Not specified. @@ -330,21 +348,3 @@ enum OutputAudioEncoding { // than MP3 while using approximately the same bitrate. OUTPUT_AUDIO_ENCODING_OGG_OPUS = 3; } - -// Instructs the speech synthesizer on how to generate the output audio content. -// If this audio config is supplied in a request, it overrides all existing -// text-to-speech settings applied to the agent. -message OutputAudioConfig { - // Required. Audio encoding of the synthesized audio content. - OutputAudioEncoding audio_encoding = 1 [(google.api.field_behavior) = REQUIRED]; - - // The synthesis sample rate (in hertz) for this audio. If not - // provided, then the synthesizer will use the default sample rate based on - // the audio encoding. If this is different from the voice's natural sample - // rate, then the synthesizer will honor this request by converting to the - // desired sample rate (which might result in worse audio quality). - int32 sample_rate_hertz = 2; - - // Configuration of how speech should be synthesized. - SynthesizeSpeechConfig synthesize_speech_config = 3; -}