Synchronize new proto/yaml changes.
PiperOrigin-RevId: 265786496
This commit is contained in:
parent
d42eb8b611
commit
49f953989b
|
|
@ -3,11 +3,12 @@ common:
|
|||
api_version: v1
|
||||
organization_name: google-cloud
|
||||
proto_deps:
|
||||
- name: google-common-protos
|
||||
- name: google-common-protos
|
||||
src_proto_paths:
|
||||
- v1
|
||||
service_yaml: speech_v1.yaml
|
||||
gapic_yaml: v1/speech_gapic.yaml
|
||||
samples: v1/samples
|
||||
artifacts:
|
||||
- name: gapic_config
|
||||
type: GAPIC_CONFIG
|
||||
|
|
|
|||
|
|
@ -3,11 +3,12 @@ common:
|
|||
api_version: v1p1beta1
|
||||
organization_name: google-cloud
|
||||
proto_deps:
|
||||
- name: google-common-protos
|
||||
- name: google-common-protos
|
||||
src_proto_paths:
|
||||
- v1p1beta1
|
||||
service_yaml: speech_v1p1beta1.yaml
|
||||
gapic_yaml: v1p1beta1/speech_gapic.yaml
|
||||
samples: v1p1beta1/samples
|
||||
artifacts:
|
||||
- name: gapic_config
|
||||
type: GAPIC_CONFIG
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
# Canonical GCS paths to resource files used by samples and sample system tests
|
||||
sample_resources:
|
||||
- uri: gs://cloud-samples-data/speech/brooklyn_bridge.mp3
|
||||
description: |
|
||||
44100 Hz, 2 channels, English, "How old is the Brooklyn Bridge?"
|
||||
- uri: gs://cloud-samples-data/speech/brooklyn_bridge.raw
|
||||
description: |
|
||||
16000 Hz, 1 channel, English, "How old is the Brooklyn Bridge?"
|
||||
|
|
@ -8,7 +11,7 @@ sample_resources:
|
|||
44100 Hz, 1 channel, English, "How old is the Brooklyn Bridge?"
|
||||
- uri: gs://cloud-samples-data/speech/brooklyn_bridge.wav
|
||||
description: |
|
||||
16000 Hz, 2 channel (only first contains audio data), English, "How old is the Brooklyn Bridge?"
|
||||
16000 Hz, 2 channels (only first contains audio data), English, "How old is the Brooklyn Bridge?"
|
||||
- uri: gs://cloud-samples-data/speech/hello.raw
|
||||
description: |
|
||||
16000 Hz, 1 channel, English, "Hello"
|
||||
|
|
|
|||
|
|
@ -1,35 +1,36 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_async
|
||||
- region_tag: speech_transcribe_async
|
||||
title: Transcribe Audio File using Long Running Operation (Local File) (LRO)
|
||||
description: Transcribe a long audio file using asynchronous speech recognition
|
||||
rpc: LongRunningRecognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/brooklyn_bridge.raw"
|
||||
- config.language_code = "en-US"
|
||||
- config.sample_rate_hertz = 16000
|
||||
- config.encoding = LINEAR16
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
- parameter: config.sample_rate_hertz
|
||||
description: Sample rate in Hertz of the audio data sent
|
||||
- parameter: config.encoding
|
||||
description: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.content
|
||||
value: "resources/brooklyn_bridge.raw"
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
- field: config.sample_rate_hertz
|
||||
value: 16000
|
||||
comment: Sample rate in Hertz of the audio data sent
|
||||
- field: config.encoding
|
||||
value: LINEAR16
|
||||
comment: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,35 +1,35 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_async_gcs
|
||||
- region_tag: speech_transcribe_async_gcs
|
||||
title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO)
|
||||
description: |
|
||||
Transcribe long audio file from Cloud Storage using asynchronous speech recognition
|
||||
description: Transcribe long audio file from Cloud Storage using asynchronous speech recognition
|
||||
rpc: LongRunningRecognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
|
||||
- config.sample_rate_hertz = 16000
|
||||
- config.language_code = "en-US"
|
||||
- config.encoding = LINEAR16
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
- parameter: config.sample_rate_hertz
|
||||
description: Sample rate in Hertz of the audio data sent
|
||||
- parameter: config.encoding
|
||||
description: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.uri
|
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
|
||||
input_parameter: storage_uri
|
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- field: config.sample_rate_hertz
|
||||
value: 16000
|
||||
comment: Sample rate in Hertz of the audio data sent
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
- field: config.encoding
|
||||
value: LINEAR16
|
||||
comment: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,38 +1,48 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_async_word_time_offsets_gcs
|
||||
- region_tag: speech_transcribe_async_word_time_offsets_gcs
|
||||
title: Getting word timestamps (Cloud Storage) (LRO)
|
||||
description: |
|
||||
Print start and end time of each word spoken in audio file from Cloud Storage
|
||||
description: Print start and end time of each word spoken in audio file from Cloud Storage
|
||||
rpc: LongRunningRecognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.flac"
|
||||
- config.enable_word_time_offsets = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
|
||||
- parameter: config.enable_word_time_offsets
|
||||
description: |
|
||||
When enabled, the first result returned by the API will include a list
|
||||
of words and the start and end time offsets (timestamps) for those words.
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
on_success:
|
||||
- comment: ["The first result includes start and end time word offsets"]
|
||||
- define: result=$resp.results[0]
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
request:
|
||||
- field: audio.uri
|
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.flac"
|
||||
input_parameter: storage_uri
|
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- field: config.enable_word_time_offsets
|
||||
value: true
|
||||
comment: |
|
||||
When enabled, the first result returned by the API will include a list
|
||||
of words and the start and end time offsets (timestamps) for those words.
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
response:
|
||||
- comment:
|
||||
- The first result includes start and end time word offsets
|
||||
- define: result = $resp.results[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
- comment: ["Print the start and end time of each word"]
|
||||
- comment:
|
||||
- Print the start and end time of each word
|
||||
- loop:
|
||||
collection: alternative.words
|
||||
variable: word
|
||||
body:
|
||||
- print: ["Word: %s", word.word]
|
||||
- print: ["Start time: %s seconds %s nanos", word.start_time.seconds, word.start_time.nanos]
|
||||
- print: ["End time: %s seconds %s nanos", word.end_time.seconds, word.end_time.nanos]
|
||||
- print:
|
||||
- "Word: %s"
|
||||
- word.word
|
||||
- print:
|
||||
- "Start time: %s seconds %s nanos"
|
||||
- word.start_time.seconds
|
||||
- word.start_time.nanos
|
||||
- print:
|
||||
- "End time: %s seconds %s nanos"
|
||||
- word.end_time.seconds
|
||||
- word.end_time.nanos
|
||||
|
|
|
|||
|
|
@ -1,38 +1,39 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_enhanced_model
|
||||
- region_tag: speech_transcribe_enhanced_model
|
||||
title: Using Enhanced Models (Local File)
|
||||
description: Transcribe a short audio file using an enhanced model
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/hello.wav"
|
||||
- config.model = "phone_call"
|
||||
- config.use_enhanced = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.model
|
||||
description: |
|
||||
The enhanced model to use, e.g. phone_call
|
||||
Currently phone_call is the only model available as an enhanced model.
|
||||
- parameter: config.use_enhanced
|
||||
description: |
|
||||
Use an enhanced model for speech recognition (when set to true).
|
||||
Project must be eligible for requesting enhanced models.
|
||||
Enhanced speech models require that you opt-in to data logging.
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.content
|
||||
value: "resources/hello.wav"
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.model
|
||||
value: "phone_call"
|
||||
comment: |
|
||||
The enhanced model to use, e.g. phone_call
|
||||
Currently phone_call is the only model available as an enhanced model.
|
||||
- field: config.use_enhanced
|
||||
value: true
|
||||
comment: |
|
||||
Use an enhanced model for speech recognition (when set to true).
|
||||
Project must be eligible for requesting enhanced models.
|
||||
Enhanced speech models require that you opt-in to data logging.
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,34 +1,35 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_model_selection
|
||||
- region_tag: speech_transcribe_model_selection
|
||||
title: Selecting a Transcription Model (Local File)
|
||||
description: Transcribe a short audio file using a specified transcription model
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/hello.wav"
|
||||
- config.model = "phone_call"
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.model
|
||||
sample_argument_name: model
|
||||
description: |
|
||||
The transcription model to use, e.g. video, phone_call, default
|
||||
For a list of available transcription models, see:
|
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.content
|
||||
value: "resources/hello.wav"
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.model
|
||||
value: "phone_call"
|
||||
input_parameter: model
|
||||
comment: |
|
||||
The transcription model to use, e.g. video, phone_call, default
|
||||
For a list of available transcription models, see:
|
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,34 +1,35 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_model_selection_gcs
|
||||
- region_tag: speech_transcribe_model_selection_gcs
|
||||
title: Selecting a Transcription Model (Cloud Storage)
|
||||
description: |
|
||||
Transcribe a short audio file from Cloud Storage using a specified transcription model
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/hello.wav"
|
||||
- config.model = "phone_call"
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
|
||||
- parameter: config.model
|
||||
sample_argument_name: model
|
||||
description: |
|
||||
The transcription model to use, e.g. video, phone_call, default
|
||||
For a list of available transcription models, see:
|
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.uri
|
||||
value: "gs://cloud-samples-data/speech/hello.wav"
|
||||
input_parameter: storage_uri
|
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- field: config.model
|
||||
value: "phone_call"
|
||||
input_parameter: model
|
||||
comment: |
|
||||
The transcription model to use, e.g. video, phone_call, default
|
||||
For a list of available transcription models, see:
|
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,30 +1,30 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_multichannel
|
||||
- region_tag: speech_transcribe_multichannel
|
||||
title: Multi-Channel Audio Transcription (Local File)
|
||||
description: Transcribe a short audio file with multiple channels
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/multi.wav"
|
||||
- config.audio_channel_count = 2
|
||||
- config.enable_separate_recognition_per_channel = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.audio_channel_count
|
||||
description: The number of channels in the input audio file (optional)
|
||||
- parameter: config.enable_separate_recognition_per_channel
|
||||
description: |
|
||||
When set to true, each audio channel will be recognized separately.
|
||||
The recognition result will contain a channel_tag field to state which
|
||||
channel that result belongs to
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.content
|
||||
value: "resources/multi.wav"
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.audio_channel_count
|
||||
value: 2
|
||||
comment: The number of channels in the input audio file (optional)
|
||||
- field: config.enable_separate_recognition_per_channel
|
||||
value: true
|
||||
comment: |
|
||||
When set to true, each audio channel will be recognized separately.
|
||||
The recognition result will contain a channel_tag field to state which
|
||||
channel that result belongs to
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
|
|
@ -32,9 +32,12 @@ samples:
|
|||
- comment:
|
||||
- "%s to recognize which audio channel this result is for"
|
||||
- channel_tag
|
||||
- print: ["Channel tag: %s", result.channel_tag]
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- "Channel tag: %s"
|
||||
- result.channel_tag
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,30 +1,29 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_multichannel_gcs
|
||||
- region_tag: speech_transcribe_multichannel_gcs
|
||||
title: Multi-Channel Audio Transcription (Cloud Storage)
|
||||
description: |
|
||||
Transcribe a short audio file from Cloud Storage with multiple channels
|
||||
description: Transcribe a short audio file from Cloud Storage with multiple channels
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/multi.wav"
|
||||
- config.audio_channel_count = 2
|
||||
- config.enable_separate_recognition_per_channel = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
|
||||
- parameter: config.audio_channel_count
|
||||
description: The number of channels in the input audio file (optional)
|
||||
- parameter: config.enable_separate_recognition_per_channel
|
||||
description: |
|
||||
When set to true, each audio channel will be recognized separately.
|
||||
The recognition result will contain a channel_tag field to state which
|
||||
channel that result belongs to
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.uri
|
||||
value: "gs://cloud-samples-data/speech/multi.wav"
|
||||
input_parameter: storage_uri
|
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- field: config.audio_channel_count
|
||||
value: 2
|
||||
comment: The number of channels in the input audio file (optional)
|
||||
- field: config.enable_separate_recognition_per_channel
|
||||
value: true
|
||||
comment: |
|
||||
When set to true, each audio channel will be recognized separately.
|
||||
The recognition result will contain a channel_tag field to state which
|
||||
channel that result belongs to
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
|
|
@ -32,9 +31,12 @@ samples:
|
|||
- comment:
|
||||
- "%s to recognize which audio channel this result is for"
|
||||
- channel_tag
|
||||
- print: ["Channel tag: %s", result.channel_tag]
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- "Channel tag: %s"
|
||||
- result.channel_tag
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,35 +1,36 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_sync
|
||||
- region_tag: speech_transcribe_sync
|
||||
title: Transcribe Audio File (Local File)
|
||||
description: Transcribe a short audio file using synchronous speech recognition
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/brooklyn_bridge.raw"
|
||||
- config.language_code = "en-US"
|
||||
- config.sample_rate_hertz = 16000
|
||||
- config.encoding = LINEAR16
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
- parameter: config.sample_rate_hertz
|
||||
description: Sample rate in Hertz of the audio data sent
|
||||
- parameter: config.encoding
|
||||
description: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.content
|
||||
value: "resources/brooklyn_bridge.raw"
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
- field: config.sample_rate_hertz
|
||||
value: 16000
|
||||
comment: Sample rate in Hertz of the audio data sent
|
||||
- field: config.encoding
|
||||
value: LINEAR16
|
||||
comment: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,35 +1,35 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_sync_gcs
|
||||
- region_tag: speech_transcribe_sync_gcs
|
||||
title: Transcript Audio File (Cloud Storage)
|
||||
description: |
|
||||
Transcribe short audio file from Cloud Storage using synchronous speech recognition
|
||||
description: Transcribe short audio file from Cloud Storage using synchronous speech recognition
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
|
||||
- config.sample_rate_hertz = 16000
|
||||
- config.language_code = "en-US"
|
||||
- config.encoding = LINEAR16
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: "URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]"
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
- parameter: config.sample_rate_hertz
|
||||
description: Sample rate in Hertz of the audio data sent
|
||||
- parameter: config.encoding
|
||||
description: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.uri
|
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
|
||||
input_parameter: storage_uri
|
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- field: config.sample_rate_hertz
|
||||
value: 16000
|
||||
comment: Sample rate in Hertz of the audio data sent
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
- field: config.encoding
|
||||
value: LINEAR16
|
||||
comment: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Transcribe Audio File using Long Running Operation (Local File) (LRO)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Getting word timestamps (Cloud Storage) (LRO)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Using Enhanced Models (Local File)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Selecting a Transcription Model (Local File)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Selecting a Transcription Model (Cloud Storage)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Multi-Channel Audio Transcription (Local File)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Multi-Channel Audio Transcription (Cloud Storage)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Transcribe Audio File (Local File)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Transcript Audio File (Cloud Storage)
|
||||
|
|
@ -62,285 +62,6 @@ interfaces:
|
|||
retry_codes_name: idempotent
|
||||
retry_params_name: default
|
||||
timeout_millis: 1000000
|
||||
samples:
|
||||
standalone:
|
||||
- region_tag: speech_transcribe_sync_gcs
|
||||
value_sets:
|
||||
- speech_transcribe_sync_gcs
|
||||
- region_tag: speech_transcribe_sync
|
||||
value_sets:
|
||||
- speech_transcribe_sync
|
||||
- region_tag: speech_transcribe_multichannel
|
||||
value_sets:
|
||||
- speech_transcribe_multichannel
|
||||
- region_tag: speech_transcribe_model_selection_gcs
|
||||
value_sets:
|
||||
- speech_transcribe_model_selection_gcs
|
||||
- region_tag: speech_transcribe_async_word_time_offsets_gcs
|
||||
value_sets:
|
||||
- speech_transcribe_async_word_time_offsets_gcs
|
||||
- region_tag: speech_transcribe_model_selection
|
||||
value_sets:
|
||||
- speech_transcribe_model_selection
|
||||
- region_tag: speech_transcribe_multichannel_gcs
|
||||
value_sets:
|
||||
- speech_transcribe_multichannel_gcs
|
||||
- region_tag: speech_transcribe_enhanced_model
|
||||
value_sets:
|
||||
- speech_transcribe_enhanced_model
|
||||
sample_value_sets:
|
||||
- id: speech_transcribe_model_selection_gcs
|
||||
title: Selecting a Transcription Model (Cloud Storage)
|
||||
description: 'Transcribe a short audio file from Cloud Storage using a specified
|
||||
transcription model
|
||||
|
||||
'
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/hello.wav"
|
||||
- config.model = "phone_call"
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- parameter: config.model
|
||||
sample_argument_name: model
|
||||
description: |
|
||||
The transcription model to use, e.g. video, phone_call, default
|
||||
For a list of available transcription models, see:
|
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_sync_gcs
|
||||
title: Transcript Audio File (Cloud Storage)
|
||||
description: 'Transcribe short audio file from Cloud Storage using synchronous
|
||||
speech recognition
|
||||
|
||||
'
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
|
||||
- config.sample_rate_hertz = 16000
|
||||
- config.language_code = "en-US"
|
||||
- config.encoding = LINEAR16
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
- parameter: config.sample_rate_hertz
|
||||
description: Sample rate in Hertz of the audio data sent
|
||||
- parameter: config.encoding
|
||||
description: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_sync
|
||||
title: Transcribe Audio File (Local File)
|
||||
description: Transcribe a short audio file using synchronous speech recognition
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/brooklyn_bridge.raw"
|
||||
- config.language_code = "en-US"
|
||||
- config.sample_rate_hertz = 16000
|
||||
- config.encoding = LINEAR16
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
- parameter: config.sample_rate_hertz
|
||||
description: Sample rate in Hertz of the audio data sent
|
||||
- parameter: config.encoding
|
||||
description: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_model_selection
|
||||
title: Selecting a Transcription Model (Local File)
|
||||
description: Transcribe a short audio file using a specified transcription model
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/hello.wav"
|
||||
- config.model = "phone_call"
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.model
|
||||
sample_argument_name: model
|
||||
description: |
|
||||
The transcription model to use, e.g. video, phone_call, default
|
||||
For a list of available transcription models, see:
|
||||
https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_multichannel_gcs
|
||||
title: Multi-Channel Audio Transcription (Cloud Storage)
|
||||
description: 'Transcribe a short audio file from Cloud Storage with multiple
|
||||
channels
|
||||
|
||||
'
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/multi.wav"
|
||||
- config.audio_channel_count = 2
|
||||
- config.enable_separate_recognition_per_channel = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- parameter: config.audio_channel_count
|
||||
description: The number of channels in the input audio file (optional)
|
||||
- parameter: config.enable_separate_recognition_per_channel
|
||||
description: |
|
||||
When set to true, each audio channel will be recognized separately.
|
||||
The recognition result will contain a channel_tag field to state which
|
||||
channel that result belongs to
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- "%s to recognize which audio channel this result is for"
|
||||
- channel_tag
|
||||
- print:
|
||||
- 'Channel tag: %s'
|
||||
- result.channel_tag
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_multichannel
|
||||
title: Multi-Channel Audio Transcription (Local File)
|
||||
description: Transcribe a short audio file with multiple channels
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/multi.wav"
|
||||
- config.audio_channel_count = 2
|
||||
- config.enable_separate_recognition_per_channel = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.audio_channel_count
|
||||
description: The number of channels in the input audio file (optional)
|
||||
- parameter: config.enable_separate_recognition_per_channel
|
||||
description: |
|
||||
When set to true, each audio channel will be recognized separately.
|
||||
The recognition result will contain a channel_tag field to state which
|
||||
channel that result belongs to
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- "%s to recognize which audio channel this result is for"
|
||||
- channel_tag
|
||||
- print:
|
||||
- 'Channel tag: %s'
|
||||
- result.channel_tag
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_enhanced_model
|
||||
title: Using Enhanced Models (Local File)
|
||||
description: Transcribe a short audio file using an enhanced model
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/hello.wav"
|
||||
- config.model = "phone_call"
|
||||
- config.use_enhanced = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.model
|
||||
description: |
|
||||
The enhanced model to use, e.g. phone_call
|
||||
Currently phone_call is the only model available as an enhanced model.
|
||||
- parameter: config.use_enhanced
|
||||
description: |
|
||||
Use an enhanced model for speech recognition (when set to true).
|
||||
Project must be eligible for requesting enhanced models.
|
||||
Enhanced speech models require that you opt-in to data logging.
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- name: LongRunningRecognize
|
||||
flattening:
|
||||
groups:
|
||||
|
|
@ -365,134 +86,6 @@ interfaces:
|
|||
poll_delay_multiplier: 1.5
|
||||
max_poll_delay_millis: 45000
|
||||
total_poll_timeout_millis: 86400000
|
||||
samples:
|
||||
standalone:
|
||||
- region_tag: speech_transcribe_async_gcs
|
||||
value_sets:
|
||||
- speech_transcribe_async_gcs
|
||||
- region_tag: speech_transcribe_async
|
||||
value_sets:
|
||||
- speech_transcribe_async
|
||||
- region_tag: speech_transcribe_async_word_time_offsets_gcs
|
||||
value_sets:
|
||||
- speech_transcribe_async_word_time_offsets_gcs
|
||||
sample_value_sets:
|
||||
- id: speech_transcribe_async_gcs
|
||||
title: Transcript Audio File using Long Running Operation (Cloud Storage) (LRO)
|
||||
description: 'Transcribe long audio file from Cloud Storage using asynchronous
|
||||
speech recognition
|
||||
|
||||
'
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.raw"
|
||||
- config.sample_rate_hertz = 16000
|
||||
- config.language_code = "en-US"
|
||||
- config.encoding = LINEAR16
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
- parameter: config.sample_rate_hertz
|
||||
description: Sample rate in Hertz of the audio data sent
|
||||
- parameter: config.encoding
|
||||
description: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_async
|
||||
title: Transcribe Audio File using Long Running Operation (Local File) (LRO)
|
||||
description: Transcribe a long audio file using asynchronous speech recognition
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/brooklyn_bridge.raw"
|
||||
- config.language_code = "en-US"
|
||||
- config.sample_rate_hertz = 16000
|
||||
- config.encoding = LINEAR16
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
- parameter: config.sample_rate_hertz
|
||||
description: Sample rate in Hertz of the audio data sent
|
||||
- parameter: config.encoding
|
||||
description: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_async_word_time_offsets_gcs
|
||||
title: Getting word timestamps (Cloud Storage) (LRO)
|
||||
description: 'Print start and end time of each word spoken in audio file from
|
||||
Cloud Storage
|
||||
|
||||
'
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.uri = "gs://cloud-samples-data/speech/brooklyn_bridge.flac"
|
||||
- config.enable_word_time_offsets = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: storage_uri
|
||||
description: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- parameter: config.enable_word_time_offsets
|
||||
description: |
|
||||
When enabled, the first result returned by the API will include a list
|
||||
of words and the start and end time offsets (timestamps) for those words.
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
on_success:
|
||||
- comment:
|
||||
- The first result includes start and end time word offsets
|
||||
- define: result=$resp.results[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- comment:
|
||||
- Print the start and end time of each word
|
||||
- loop:
|
||||
collection: alternative.words
|
||||
variable: word
|
||||
body:
|
||||
- print:
|
||||
- 'Word: %s'
|
||||
- word.word
|
||||
- print:
|
||||
- 'Start time: %s seconds %s nanos'
|
||||
- word.start_time.seconds
|
||||
- word.start_time.nanos
|
||||
- print:
|
||||
- 'End time: %s seconds %s nanos'
|
||||
- word.end_time.seconds
|
||||
- word.end_time.nanos
|
||||
- name: StreamingRecognize
|
||||
retry_codes_name: idempotent
|
||||
retry_params_name: default
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- region_tag: speech_adaptation_beta
|
||||
title: Speech Adaptation (Cloud Storage)
|
||||
description: Transcribe a short audio file with speech adaptation.
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1p1beta1.Speech
|
||||
request:
|
||||
- field: audio.uri
|
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"
|
||||
input_parameter: storage_uri
|
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- field: config.speech_contexts[0].phrases[0]
|
||||
value: "Brooklyn Bridge"
|
||||
input_parameter: phrase
|
||||
comment: |
|
||||
Phrase "hints" help recognize the specified phrases from your audio.
|
||||
- field: config.speech_contexts[0].boost
|
||||
value: 20.0
|
||||
comment: |
|
||||
Hint Boost. This value increases the probability that a specific
|
||||
phrase will be recognized over other similar sounding phrases.
|
||||
The higher the boost, the higher the chance of false positive
|
||||
recognition as well. Can accept wide range of positive values.
|
||||
Most use cases are best served with values between 0 and 20.
|
||||
Using a binary search happroach may help you find the optimal value.
|
||||
- field: config.sample_rate_hertz
|
||||
value: 44100
|
||||
comment: Sample rate in Hertz of the audio data sent
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
- field: config.encoding
|
||||
value: MP3
|
||||
comment: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- region_tag: speech_contexts_classes_beta
|
||||
title: Using Context Classes (Cloud Storage)
|
||||
description: Transcribe a short audio file with static context classes.
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1p1beta1.Speech
|
||||
request:
|
||||
- field: audio.uri
|
||||
value: "gs://cloud-samples-data/speech/time.mp3"
|
||||
input_parameter: storage_uri
|
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- field: config.speech_contexts[0].phrases[0]
|
||||
value: "$TIME"
|
||||
input_parameter: phrase
|
||||
comment: |
|
||||
Phrase "hints" help recognize the specified phrases from your audio.
|
||||
In this sample we are using a static class phrase ($TIME).
|
||||
Classes represent groups of words that represent common concepts
|
||||
that occur in natural language.
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
- field: config.sample_rate_hertz
|
||||
value: 24000
|
||||
comment: Sample rate in Hertz of the audio data sent
|
||||
- field: config.encoding
|
||||
value: MP3
|
||||
comment: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- region_tag: speech_quickstart_beta
|
||||
title: Quickstart Beta
|
||||
description: Performs synchronous speech recognition on an audio file
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1p1beta1.Speech
|
||||
request:
|
||||
- field: audio.uri
|
||||
value: "gs://cloud-samples-data/speech/brooklyn_bridge.mp3"
|
||||
input_parameter: storage_uri
|
||||
comment: URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
- field: config.sample_rate_hertz
|
||||
value: 44100
|
||||
comment: Sample rate in Hertz of the audio data sent
|
||||
- field: config.encoding
|
||||
value: MP3
|
||||
comment: |
|
||||
Encoding of audio data sent. This sample sets this explicitly.
|
||||
This field is optional for FLAC and WAV audio formats.
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
@ -1,34 +1,35 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_auto_punctuation_beta
|
||||
- region_tag: speech_transcribe_auto_punctuation_beta
|
||||
title: Getting punctuation in results (Local File) (Beta)
|
||||
description: |
|
||||
Transcribe a short audio file with punctuation
|
||||
description: Transcribe a short audio file with punctuation
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1p1beta1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/commercial_mono.wav"
|
||||
- config.enable_automatic_punctuation = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.enable_automatic_punctuation
|
||||
description: |
|
||||
When enabled, trascription results may include punctuation (available for select languages).
|
||||
- parameter: config.language_code
|
||||
description: |
|
||||
The language of the supplied audio. Even though additional languages are
|
||||
provided by alternative_language_codes, a primary language is still required.
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.content
|
||||
value: "resources/commercial_mono.wav"
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.enable_automatic_punctuation
|
||||
value: true
|
||||
comment: |
|
||||
When enabled, trascription results may include punctuation
|
||||
(available for select languages).
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: |
|
||||
The language of the supplied audio. Even though additional languages are
|
||||
provided by alternative_language_codes, a primary language is still required.
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,46 +1,51 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
# TODO: this id should include "async" (calls LongRunningRecognize async rpc)
|
||||
- id: speech_transcribe_diarization_beta
|
||||
- region_tag: speech_transcribe_diarization_beta
|
||||
title: Separating different speakers (Local File) (LRO) (Beta)
|
||||
description: |
|
||||
Print confidence level for individual words in a transcription of a short audio file
|
||||
Separating different speakers in an audio file recording
|
||||
rpc: LongRunningRecognize
|
||||
service: google.cloud.speech.v1p1beta1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/commercial_mono.wav"
|
||||
- config.enable_speaker_diarization = True
|
||||
- config.diarization_speaker_count = 2
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.enable_speaker_diarization
|
||||
description: |
|
||||
If enabled, each word in the first alternative of each result will be
|
||||
tagged with a speaker tag to identify the speaker.
|
||||
- parameter: config.diarization_speaker_count
|
||||
description: |
|
||||
Optional. Specifies the estimated number of speakers in the conversation.
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.content
|
||||
value: "resources/commercial_mono.wav"
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.enable_speaker_diarization
|
||||
value: true
|
||||
comment: |
|
||||
If enabled, each word in the first alternative of each result will be
|
||||
tagged with a speaker tag to identify the speaker.
|
||||
- field: config.diarization_speaker_count
|
||||
value: 2
|
||||
comment: Optional. Specifies the estimated number of speakers in the conversation.
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
response:
|
||||
- loop:
|
||||
collection: $resp.results
|
||||
variable: result
|
||||
body:
|
||||
- comment: ["First alternative has words tagged with speakers"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative has words tagged with speakers
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
- comment: ["Print the %s of each word", speaker_tag]
|
||||
- comment:
|
||||
- Print the %s of each word
|
||||
- speaker_tag
|
||||
- loop:
|
||||
collection: alternative.words
|
||||
variable: word
|
||||
body:
|
||||
- print: ["Word: %s", word.word]
|
||||
- print: ["Speaker tag: %s", word.speaker_tag]
|
||||
- print:
|
||||
- 'Word: %s'
|
||||
- word.word
|
||||
- print:
|
||||
- 'Speaker tag: %s'
|
||||
- word.speaker_tag
|
||||
|
|
|
|||
|
|
@ -1,37 +1,44 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_multilanguage_beta
|
||||
- region_tag: speech_transcribe_multilanguage_beta
|
||||
title: Detecting language spoken automatically (Local File) (Beta)
|
||||
description: |
|
||||
Transcribe a short audio file with language detected from a list of possible languages
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1p1beta1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/brooklyn_bridge.flac"
|
||||
- config.language_code = "fr"
|
||||
- config.alternative_language_codes[0] = "es"
|
||||
- config.alternative_language_codes[1] = "en"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.alternative_language_codes[0]
|
||||
description: |
|
||||
Specify up to 3 additional languages as possible alternative languages of the supplied audio.
|
||||
- parameter: config.language_code
|
||||
description: |
|
||||
The language of the supplied audio. Even though additional languages are
|
||||
provided by alternative_language_codes, a primary language is still required.
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.content
|
||||
value: resources/brooklyn_bridge.flac
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.language_code
|
||||
value: "fr"
|
||||
comment: |
|
||||
The language of the supplied audio. Even though additional languages are
|
||||
provided by alternative_language_codes, a primary language is still required.
|
||||
- field: config.alternative_language_codes[0]
|
||||
value: "es"
|
||||
comment: |
|
||||
Specify up to 3 additional languages as possible alternative languages
|
||||
of the supplied audio.
|
||||
- field: config.alternative_language_codes[1]
|
||||
value: "en"
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["The %s which was detected as the most likely being spoken in the audio", language_code]
|
||||
- print: ["Detected language: %s", result.language_code]
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- The %s which was detected as the most likely being spoken in the audio
|
||||
- language_code
|
||||
- print:
|
||||
- "Detected language: %s"
|
||||
- result.language_code
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,42 +1,41 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_recognition_metadata_beta
|
||||
- region_tag: speech_transcribe_recognition_metadata_beta
|
||||
title: Adding recognition metadata (Local File) (Beta)
|
||||
description: |
|
||||
Adds additional details short audio file included in this recognition request
|
||||
description: Adds additional details short audio file included in this recognition request
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1p1beta1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/commercial_mono.wav"
|
||||
- config.metadata.interaction_type = VOICE_SEARCH
|
||||
- config.metadata.recording_device_type = SMARTPHONE
|
||||
- config.metadata.recording_device_name = "Pixel 3"
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.metadata.interaction_type
|
||||
description: |
|
||||
The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION, et al.
|
||||
- parameter: config.metadata.recording_device_type
|
||||
description: The kind of device used to capture the audio
|
||||
- parameter: config.metadata.recording_device_name
|
||||
description: |
|
||||
The device used to make the recording.
|
||||
Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other value.
|
||||
- parameter: config.language_code
|
||||
description: |
|
||||
The language of the supplied audio. Even though additional languages are
|
||||
provided by alternative_language_codes, a primary language is still required.
|
||||
on_success:
|
||||
request:
|
||||
- field: audio.content
|
||||
value: "resources/commercial_mono.wav"
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.metadata.interaction_type
|
||||
value: VOICE_SEARCH
|
||||
comment: The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION, et al.
|
||||
- field: config.metadata.recording_device_type
|
||||
value: SMARTPHONE
|
||||
comment: The kind of device used to capture the audio
|
||||
- field: config.metadata.recording_device_name
|
||||
value: "Pixel 3"
|
||||
comment: |
|
||||
The device used to make the recording.
|
||||
Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other value.
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: |
|
||||
The language of the supplied audio. Even though additional languages are
|
||||
provided by alternative_language_codes, a primary language is still required.
|
||||
response:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: $resp.results
|
||||
body:
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
|
|
|
|||
|
|
@ -1,38 +1,45 @@
|
|||
type: com.google.api.codegen.samplegen.v1p2.SampleConfigProto
|
||||
schema_version: 1.2.0
|
||||
samples:
|
||||
- id: speech_transcribe_word_level_confidence_beta
|
||||
- region_tag: speech_transcribe_word_level_confidence_beta
|
||||
title: Enabling word-level confidence (Local File) (Beta)
|
||||
description: |
|
||||
Print confidence level for individual words in a transcription of a short audio file
|
||||
Print confidence level for individual words in a transcription of a short audio file.
|
||||
rpc: Recognize
|
||||
service: google.cloud.speech.v1p1beta1.Speech
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/brooklyn_bridge.flac"
|
||||
- config.enable_word_confidence = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: "Path to local audio file, e.g. /path/audio.wav"
|
||||
- parameter: config.enable_word_confidence
|
||||
description: |
|
||||
When enabled, the first result returned by the API will include a list
|
||||
of words and the confidence level for each of those words.
|
||||
- parameter: config.language_code
|
||||
description: "The language of the supplied audio"
|
||||
on_success:
|
||||
- comment: ["The first result includes confidence levels per word"]
|
||||
- define: result=$resp.results[0]
|
||||
- comment: ["First alternative is the most probable result"]
|
||||
- define: alternative=result.alternatives[0]
|
||||
request:
|
||||
- field: audio.content
|
||||
value: "resources/brooklyn_bridge.flac"
|
||||
input_parameter: local_file_path
|
||||
comment: Path to local audio file, e.g. /path/audio.wav
|
||||
value_is_file: true
|
||||
- field: config.enable_word_confidence
|
||||
value: true
|
||||
comment: |
|
||||
When enabled, the first result returned by the API will include a list
|
||||
of words and the confidence level for each of those words.
|
||||
- field: config.language_code
|
||||
value: "en-US"
|
||||
comment: The language of the supplied audio
|
||||
response:
|
||||
- comment:
|
||||
- The first result includes confidence levels per word
|
||||
- define: result = $resp.results[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative = result.alternatives[0]
|
||||
- print:
|
||||
- "Transcript: %s"
|
||||
- alternative.transcript
|
||||
- comment: ["Print the confidence level of each word"]
|
||||
- comment:
|
||||
- Print the confidence level of each word
|
||||
- loop:
|
||||
collection: alternative.words
|
||||
variable: word
|
||||
body:
|
||||
- print: ["Word: %s", word.word]
|
||||
- print: ["Confidence: %s", word.confidence]
|
||||
- print:
|
||||
- "Word: %s"
|
||||
- word.word
|
||||
- print:
|
||||
- "Confidence: %s"
|
||||
- word.confidence
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Speech-to-Text Sample Tests For Speech Adaptation
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Speech-to-Text Sample Tests For Speech Contexts Static Classes
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Speech-to-Text Sample Tests For Quickstart
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Getting punctuation in results (Local File) (Beta)
|
||||
|
|
@ -12,7 +14,7 @@ test:
|
|||
# Simply assert that actual punctuation is present from commercial_mono.wav
|
||||
- literal: "?"
|
||||
- literal: ","
|
||||
- literal: "'"
|
||||
- literal: ""
|
||||
|
||||
# Confirm that another file can be transcribed (use another file)
|
||||
- name: speech_transcribe_auto_punctuation_beta (--local_file_path)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Separating different speakers (Local File) (LRO) (Beta)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Detecting language spoken automatically (Local File) (Beta)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Adding recognition metadata (Local File) (Beta)
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
type: test/samples
|
||||
schema_version: 1
|
||||
test:
|
||||
suites:
|
||||
- name: Enabling word-level confidence (Local File) (Beta)
|
||||
|
|
@ -60,289 +60,6 @@ interfaces:
|
|||
retry_codes_name: idempotent
|
||||
retry_params_name: default
|
||||
timeout_millis: 1000000
|
||||
samples:
|
||||
standalone:
|
||||
- region_tag: speech_transcribe_word_level_confidence_beta
|
||||
value_sets:
|
||||
- speech_transcribe_word_level_confidence_beta
|
||||
- region_tag: speech_transcribe_multilanguage_beta
|
||||
value_sets:
|
||||
- speech_transcribe_multilanguage_beta
|
||||
- region_tag: speech_transcribe_recognition_metadata_beta
|
||||
value_sets:
|
||||
- speech_transcribe_recognition_metadata_beta
|
||||
- region_tag: speech_transcribe_auto_punctuation_beta
|
||||
value_sets:
|
||||
- speech_transcribe_auto_punctuation_beta
|
||||
- region_tag: speech_quickstart_beta
|
||||
value_sets:
|
||||
- speech_quickstart_beta
|
||||
- region_tag: speech_adaptation_beta
|
||||
value_sets:
|
||||
- speech_adaptation_beta
|
||||
- region_tag: speech_contexts_classes_beta
|
||||
value_sets:
|
||||
- speech_contexts_classes_beta
|
||||
sample_value_sets:
|
||||
- id: speech_transcribe_word_level_confidence_beta
|
||||
title: Enabling word-level confidence (Local File) (Beta)
|
||||
description: 'Print confidence level for individual words in a transcription
|
||||
of a short audio file
|
||||
|
||||
'
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/brooklyn_bridge.flac"
|
||||
- config.enable_word_confidence = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.enable_word_confidence
|
||||
description: |
|
||||
When enabled, the first result returned by the API will include a list
|
||||
of words and the confidence level for each of those words.
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
on_success:
|
||||
- comment:
|
||||
- The first result includes confidence levels per word
|
||||
- define: result=$resp.results[0]
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- comment:
|
||||
- Print the confidence level of each word
|
||||
- loop:
|
||||
collection: alternative.words
|
||||
variable: word
|
||||
body:
|
||||
- print:
|
||||
- 'Word: %s'
|
||||
- word.word
|
||||
- print:
|
||||
- 'Confidence: %s'
|
||||
- word.confidence
|
||||
- id: speech_transcribe_multilanguage_beta
|
||||
title: Detecting language spoken automatically (Local File) (Beta)
|
||||
description: 'Transcribe a short audio file with language detected from a list
|
||||
of possible languages
|
||||
|
||||
'
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/brooklyn_bridge.flac"
|
||||
- config.language_code = "fr"
|
||||
- config.alternative_language_codes[0] = "es"
|
||||
- config.alternative_language_codes[1] = "en"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.alternative_language_codes[0]
|
||||
description: 'Specify up to 3 additional languages as possible alternative
|
||||
languages of the supplied audio.
|
||||
|
||||
'
|
||||
- parameter: config.language_code
|
||||
description: |
|
||||
The language of the supplied audio. Even though additional languages are
|
||||
provided by alternative_language_codes, a primary language is still required.
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- The %s which was detected as the most likely being spoken in the audio
|
||||
- language_code
|
||||
- print:
|
||||
- 'Detected language: %s'
|
||||
- result.language_code
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_auto_punctuation_beta
|
||||
title: Getting punctuation in results (Local File) (Beta)
|
||||
description: 'Transcribe a short audio file with punctuation
|
||||
|
||||
'
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/commercial_mono.wav"
|
||||
- config.enable_automatic_punctuation = True
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.enable_automatic_punctuation
|
||||
description: 'When enabled, trascription results may include punctuation
|
||||
(available for select languages).
|
||||
|
||||
'
|
||||
- parameter: config.language_code
|
||||
description: |
|
||||
The language of the supplied audio. Even though additional languages are
|
||||
provided by alternative_language_codes, a primary language is still required.
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_transcribe_recognition_metadata_beta
|
||||
title: Adding recognition metadata (Local File) (Beta)
|
||||
description: "Adds additional details short audio file included in this recognition
|
||||
request \n"
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/commercial_mono.wav"
|
||||
- config.metadata.interaction_type = VOICE_SEARCH
|
||||
- config.metadata.recording_device_type = SMARTPHONE
|
||||
- config.metadata.recording_device_name = "Pixel 3"
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.metadata.interaction_type
|
||||
description: 'The use case of the audio, e.g. PHONE_CALL, DISCUSSION, PRESENTATION,
|
||||
et al.
|
||||
|
||||
'
|
||||
- parameter: config.metadata.recording_device_type
|
||||
description: The kind of device used to capture the audio
|
||||
- parameter: config.metadata.recording_device_name
|
||||
description: |
|
||||
The device used to make the recording.
|
||||
Arbitrary string, e.g. 'Pixel XL', 'VoIP', 'Cardioid Microphone', or other value.
|
||||
- parameter: config.language_code
|
||||
description: |
|
||||
The language of the supplied audio. Even though additional languages are
|
||||
provided by alternative_language_codes, a primary language is still required.
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_quickstart_beta
|
||||
description: "Performs synchronous speech recognition on an audio file."
|
||||
parameters:
|
||||
defaults:
|
||||
- config.encoding=MP3
|
||||
- config.sample_rate_hertz=44100
|
||||
- config.language_code="en-US"
|
||||
- audio.uri="gs://cloud-samples-data/speech/brooklyn_bridge.mp3"
|
||||
attributes:
|
||||
- parameter: config.sample_rate_hertz
|
||||
sample_argument_name: sample_rate_hertz
|
||||
description: "Sample rate in Hertz of the audio data sent in all `RecognitionAudio` messages. Valid values are: 8000-48000."
|
||||
- parameter: config.language_code
|
||||
sample_argument_name: language_code
|
||||
description: The language of the supplied audio.
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: uri_path
|
||||
description: Path to the audio file stored on GCS.
|
||||
on_success:
|
||||
- loop:
|
||||
collection: $resp.results
|
||||
variable: result
|
||||
body:
|
||||
- define: transcript=result.alternatives[0].transcript
|
||||
- print: ["Transcript: %s", transcript]
|
||||
- id: speech_adaptation_beta
|
||||
description: "Performs synchronous speech recognition with speech adaptation."
|
||||
parameters:
|
||||
defaults:
|
||||
- config.encoding=MP3
|
||||
- config.sample_rate_hertz=44100
|
||||
- config.language_code="en-US"
|
||||
- config.speech_contexts[0].phrases[0]="Brooklyn Bridge"
|
||||
- config.speech_contexts[0].boost=20
|
||||
- audio.uri="gs://cloud-samples-data/speech/brooklyn_bridge.mp3"
|
||||
attributes:
|
||||
- parameter: config.sample_rate_hertz
|
||||
sample_argument_name: sample_rate_hertz
|
||||
description: "Sample rate in Hertz of the audio data sent in all `RecognitionAudio` messages. Valid values are: 8000-48000."
|
||||
- parameter: config.language_code
|
||||
sample_argument_name: language_code
|
||||
description: The language of the supplied audio.
|
||||
- parameter: config.speech_contexts[0].phrases[0]
|
||||
sample_argument_name: phrase
|
||||
description: Phrase "hints" help Speech-to-Text API recognize the specified phrases from your audio data.
|
||||
- parameter: config.speech_contexts[0].boost
|
||||
sample_argument_name: boost
|
||||
description: Positive value will increase the probability that a specific phrase will be recognized over other similar sounding phrases.
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: uri_path
|
||||
description: Path to the audio file stored on GCS.
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- id: speech_contexts_classes_beta
|
||||
description: "Performs synchronous speech recognition with static context classes."
|
||||
parameters:
|
||||
defaults:
|
||||
- config.encoding=MP3
|
||||
- config.sample_rate_hertz=24000
|
||||
- config.language_code="en-US"
|
||||
- config.speech_contexts[0].phrases[0]="$TIME"
|
||||
- audio.uri="gs://cloud-samples-data/speech/time.mp3"
|
||||
attributes:
|
||||
- parameter: config.sample_rate_hertz
|
||||
sample_argument_name: sample_rate_hertz
|
||||
description: "Sample rate in Hertz of the audio data sent in all `RecognitionAudio` messages. Valid values are: 8000-48000."
|
||||
- parameter: config.language_code
|
||||
sample_argument_name: language_code
|
||||
description: The language of the supplied audio.
|
||||
- parameter: config.speech_contexts[0].phrases[0]
|
||||
sample_argument_name: phrase
|
||||
description: Phrase "hints" help Speech-to-Text API recognize the specified phrases from your audio data. In this sample we are using a static class phrase ($TIME). Classes represent groups of words that represent common concepts that occur in natural language. We recommend checking out the docs page for more info on static classes.
|
||||
- parameter: audio.uri
|
||||
sample_argument_name: uri_path
|
||||
description: Path to the audio file stored on GCS.
|
||||
on_success:
|
||||
- loop:
|
||||
variable: result
|
||||
collection: "$resp.results"
|
||||
body:
|
||||
- comment:
|
||||
- First alternative is the most probable result
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- name: LongRunningRecognize
|
||||
flattening:
|
||||
groups:
|
||||
|
|
@ -367,63 +84,6 @@ interfaces:
|
|||
poll_delay_multiplier: 1.5
|
||||
max_poll_delay_millis: 45000
|
||||
total_poll_timeout_millis: 86400000
|
||||
samples:
|
||||
standalone:
|
||||
- region_tag: speech_transcribe_diarization_beta
|
||||
value_sets:
|
||||
- speech_transcribe_diarization_beta
|
||||
sample_value_sets:
|
||||
- id: speech_transcribe_diarization_beta
|
||||
title: Separating different speakers (Local File) (LRO) (Beta)
|
||||
description: |
|
||||
Print confidence level for individual words in a transcription of a short audio file
|
||||
Separating different speakers in an audio file recording
|
||||
parameters:
|
||||
defaults:
|
||||
- audio.content = "resources/commercial_mono.wav"
|
||||
- config.enable_speaker_diarization = True
|
||||
- config.diarization_speaker_count = 2
|
||||
- config.language_code = "en-US"
|
||||
attributes:
|
||||
- parameter: audio.content
|
||||
sample_argument_name: local_file_path
|
||||
read_file: true
|
||||
description: Path to local audio file, e.g. /path/audio.wav
|
||||
- parameter: config.enable_speaker_diarization
|
||||
description: |
|
||||
If enabled, each word in the first alternative of each result will be
|
||||
tagged with a speaker tag to identify the speaker.
|
||||
- parameter: config.diarization_speaker_count
|
||||
description: 'Optional. Specifies the estimated number of speakers in the
|
||||
conversation.
|
||||
|
||||
'
|
||||
- parameter: config.language_code
|
||||
description: The language of the supplied audio
|
||||
on_success:
|
||||
- loop:
|
||||
collection: "$resp.results"
|
||||
variable: result
|
||||
body:
|
||||
- comment:
|
||||
- First alternative has words tagged with speakers
|
||||
- define: alternative=result.alternatives[0]
|
||||
- print:
|
||||
- 'Transcript: %s'
|
||||
- alternative.transcript
|
||||
- comment:
|
||||
- Print the %s of each word
|
||||
- speaker_tag
|
||||
- loop:
|
||||
collection: alternative.words
|
||||
variable: word
|
||||
body:
|
||||
- print:
|
||||
- 'Word: %s'
|
||||
- word.word
|
||||
- print:
|
||||
- 'Speaker tag: %s'
|
||||
- word.speaker_tag
|
||||
- name: StreamingRecognize
|
||||
retry_codes_name: idempotent
|
||||
retry_params_name: default
|
||||
|
|
|
|||
Loading…
Reference in New Issue