diff --git a/google/cloud/speech/speech_v1.yaml b/google/cloud/speech/speech_v1.yaml
index 1ffe2072..e0dd453a 100644
--- a/google/cloud/speech/speech_v1.yaml
+++ b/google/cloud/speech/speech_v1.yaml
@@ -1,35 +1,44 @@
-# Google Cloud Speech API service configuration
-
 type: google.api.Service
 config_version: 3
 name: speech.googleapis.com
-
-title: Google Cloud Speech API
-
-documentation:
-  summary:
-    Google Cloud Speech API.
+title: Cloud Speech API
 
 apis:
 - name: google.cloud.speech.v1.Speech
 
-authentication:
+documentation:
+  summary: Converts audio to text by applying powerful neural network models.
+  overview: |-
+    # Introduction
+
+    Google Cloud Speech API provides speech recognition as a service.
+
+backend:
   rules:
-    - selector: '*'
-      oauth:
-        canonical_scopes: https://www.googleapis.com/auth/cloud-platform
+  - selector: google.longrunning.Operations.GetOperation
+    deadline: 200.0
+  - selector: google.longrunning.Operations.WaitOperation
+    deadline: 200.0
+  - selector: google.cloud.speech.v1.Speech.Recognize
+    deadline: 200.0
+  - selector: google.cloud.speech.v1.Speech.LongRunningRecognize
+    deadline: 200.0
+  - selector: google.cloud.speech.v1.Speech.StreamingRecognize
+    deadline: 200.0
 
 http:
   rules:
-  - selector: google.longrunning.Operations.ListOperations
-    get: '/v1/operations'
-
   - selector: google.longrunning.Operations.GetOperation
     get: '/v1/operations/{name=*}'
+    additional_bindings:
+    - get: '/v1beta1/operations/{name=*}'
 
-  - selector: google.longrunning.Operations.DeleteOperation
-    delete: '/v1/operations/{name=*}'
+    - get: '/v1p1beta1/operations/{name=*}'
 
-  - selector: google.longrunning.Operations.CancelOperation
-    post: '/v1/operations/{name=*}:cancel'
-    body: '*'
+
+authentication:
+  rules:
+  - selector: '*'
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform
diff --git a/google/cloud/speech/v1/cloud_speech.proto b/google/cloud/speech/v1/cloud_speech.proto
index b1bac7df..001d54b3 100644
--- a/google/cloud/speech/v1/cloud_speech.proto
+++ b/google/cloud/speech/v1/cloud_speech.proto
@@ -1,4 +1,4 @@
-// Copyright 2017 Google Inc.
+// Copyright 2018 Google LLC.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+//
 
 syntax = "proto3";
 
@@ -20,6 +21,7 @@ import "google/api/annotations.proto";
 import "google/longrunning/operations.proto";
 import "google/protobuf/any.proto";
 import "google/protobuf/duration.proto";
+import "google/protobuf/empty.proto";
 import "google/protobuf/timestamp.proto";
 import "google/rpc/status.proto";
 
@@ -35,7 +37,10 @@ service Speech {
   // Performs synchronous speech recognition: receive results after all audio
   // has been sent and processed.
   rpc Recognize(RecognizeRequest) returns (RecognizeResponse) {
-    option (google.api.http) = { post: "/v1/speech:recognize" body: "*" };
+    option (google.api.http) = {
+      post: "/v1/speech:recognize"
+      body: "*"
+    };
   }
 
   // Performs asynchronous speech recognition: receive results via the
@@ -43,12 +48,16 @@ service Speech {
   // `Operation.error` or an `Operation.response` which contains
   // a `LongRunningRecognizeResponse` message.
   rpc LongRunningRecognize(LongRunningRecognizeRequest) returns (google.longrunning.Operation) {
-    option (google.api.http) = { post: "/v1/speech:longrunningrecognize" body: "*" };
+    option (google.api.http) = {
+      post: "/v1/speech:longrunningrecognize"
+      body: "*"
+    };
   }
 
   // Performs bidirectional streaming speech recognition: receive results while
   // sending audio. This method is only available via the gRPC API (not REST).
-  rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse);
+  rpc StreamingRecognize(stream StreamingRecognizeRequest) returns (stream StreamingRecognizeResponse) {
+  }
 }
 
 // The top-level message sent by the client for the `Recognize` method.
@@ -92,7 +101,7 @@ message StreamingRecognizeRequest {
     // `audio_content` data. The audio bytes must be encoded as specified in
     // `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a
     // pure binary representation (not base64). See
-    // [audio limits](https://cloud.google.com/speech/limits#content).
+    // [content limits](/speech-to-text/quotas#content).
     bytes audio_content = 2;
   }
 }
@@ -127,24 +136,34 @@ message StreamingRecognitionConfig {
 // Provides information to the recognizer that specifies how to process the
 // request.
 message RecognitionConfig {
-  // Audio encoding of the data sent in the audio message. All encodings support
-  // only 1 channel (mono) audio. Only `FLAC` and `WAV` include a header that
-  // describes the bytes of audio that follow the header. The other encodings
-  // are raw audio bytes with no header.
+  // The encoding of the audio data sent in the request.
+  //
+  // All encodings support only 1 channel (mono) audio.
   //
   // For best results, the audio source should be captured and transmitted using
-  // a lossless encoding (`FLAC` or `LINEAR16`). Recognition accuracy may be
-  // reduced if lossy codecs, which include the other codecs listed in
-  // this section, are used to capture or transmit the audio, particularly if
-  // background noise is present.
+  // a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech
+  // recognition can be reduced if lossy codecs are used to capture or transmit
+  // audio, particularly if background noise is present. Lossy codecs include
+  // `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, and `SPEEX_WITH_HEADER_BYTE`.
+  //
+  // The `FLAC` and `WAV` audio file formats include a header that describes the
+  // included audio content. You can request recognition for `WAV` files that
+  // contain either `LINEAR16` or `MULAW` encoded audio.
+  // If you send `FLAC` or `WAV` audio file format in
+  // your request, you do not need to specify an `AudioEncoding`; the audio
+  // encoding format is determined from the file header. If you specify
+  // an `AudioEncoding` when you send  send `FLAC` or `WAV` audio, the
+  // encoding configuration must match the encoding described in the audio
+  // header; otherwise the request returns an
+  // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
   enum AudioEncoding {
-    // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT].
+    // Not specified.
     ENCODING_UNSPECIFIED = 0;
 
     // Uncompressed 16-bit signed little-endian samples (Linear PCM).
     LINEAR16 = 1;
 
-    // [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio
+    // `FLAC` (Free Lossless Audio
     // Codec) is the recommended encoding because it is
     // lossless--therefore recognition is not compromised--and
     // requires only about half the bandwidth of `LINEAR16`. `FLAC` stream
@@ -163,7 +182,7 @@ message RecognitionConfig {
 
     // Opus encoded audio frames in Ogg container
     // ([OggOpus](https://wiki.xiph.org/OggOpus)).
-    // `sample_rate_hertz` must be 16000.
+    // `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000.
     OGG_OPUS = 6;
 
     // Although the use of lossy encodings is not recommended, if a very low
@@ -182,20 +201,24 @@ message RecognitionConfig {
     SPEEX_WITH_HEADER_BYTE = 7;
   }
 
-  // *Required* Encoding of audio data sent in all `RecognitionAudio` messages.
+  // Encoding of audio data sent in all `RecognitionAudio` messages.
+  // This field is optional for `FLAC` and `WAV` audio files and required
+  // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
   AudioEncoding encoding = 1;
 
-  // *Required* Sample rate in Hertz of the audio data sent in all
+  // Sample rate in Hertz of the audio data sent in all
   // `RecognitionAudio` messages. Valid values are: 8000-48000.
   // 16000 is optimal. For best results, set the sampling rate of the audio
   // source to 16000 Hz. If that's not possible, use the native sample rate of
   // the audio source (instead of re-sampling).
+  // This field is optional for `FLAC` and `WAV` audio files and required
+  // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding].
   int32 sample_rate_hertz = 2;
 
   // *Required* The language of the supplied audio as a
   // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
   // Example: "en-US".
-  // See [Language Support](https://cloud.google.com/speech/docs/languages)
+  // See [Language Support](/speech-to-text/docs/languages)
   // for a list of the currently supported language codes.
   string language_code = 3;
 
@@ -213,7 +236,9 @@ message RecognitionConfig {
   // won't be filtered out.
   bool profanity_filter = 5;
 
-  // *Optional* A means to provide context to assist the speech recognition.
+  // *Optional* array of [SpeechContext][google.cloud.speech.v1.SpeechContext].
+  // A means to provide context to assist the speech recognition. For more
+  // information, see [Phrase Hints](/speech-to-text/docs/basics#phrase-hints).
   repeated SpeechContext speech_contexts = 6;
 
   // *Optional* If `true`, the top result includes a list of words and
@@ -221,6 +246,62 @@ message RecognitionConfig {
   // `false`, no word-level time offset information is returned. The default is
   // `false`.
   bool enable_word_time_offsets = 8;
+
+  // *Optional* If 'true', adds punctuation to recognition result hypotheses.
+  // This feature is only available in select languages. Setting this for
+  // requests in other languages has no effect at all.
+  // The default 'false' value does not add punctuation to result hypotheses.
+  // Note: This is currently offered as an experimental service, complimentary
+  // to all users. In the future this may be exclusively available as a
+  // premium feature.
+  bool enable_automatic_punctuation = 11;
+
+  // *Optional* Which model to select for the given request. Select the model
+  // best suited to your domain to get best results. If a model is not
+  // explicitly specified, then we auto-select a model based on the parameters
+  // in the RecognitionConfig.
+  // <table>
+  //   <tr>
+  //     <td><b>Model</b></td>
+  //     <td><b>Description</b></td>
+  //   </tr>
+  //   <tr>
+  //     <td><code>command_and_search</code></td>
+  //     <td>Best for short queries such as voice commands or voice search.</td>
+  //   </tr>
+  //   <tr>
+  //     <td><code>phone_call</code></td>
+  //     <td>Best for audio that originated from a phone call (typically
+  //     recorded at an 8khz sampling rate).</td>
+  //   </tr>
+  //   <tr>
+  //     <td><code>video</code></td>
+  //     <td>Best for audio that originated from from video or includes multiple
+  //         speakers. Ideally the audio is recorded at a 16khz or greater
+  //         sampling rate. This is a premium model that costs more than the
+  //         standard rate.</td>
+  //   </tr>
+  //   <tr>
+  //     <td><code>default</code></td>
+  //     <td>Best for audio that is not one of the specific audio models.
+  //         For example, long-form audio. Ideally the audio is high-fidelity,
+  //         recorded at a 16khz or greater sampling rate.</td>
+  //   </tr>
+  // </table>
+  string model = 13;
+
+  // *Optional* Set to true to use an enhanced model for speech recognition.
+  // You must also set the `model` field to a valid, enhanced model. If
+  // `use_enhanced` is set to true and the `model` field is not set, then
+  // `use_enhanced` is ignored. If `use_enhanced` is true and an enhanced
+  // version of the specified model does not exist, then the speech is
+  // recognized using the standard version of the specified model.
+  //
+  // Enhanced speech models require that you opt-in to data logging using
+  // instructions in the [documentation](/speech-to-text/enable-data-logging).
+  // If you set `use_enhanced` to true and you have not enabled audio logging,
+  // then you will receive an error.
+  bool use_enhanced = 14;
 }
 
 // Provides "hints" to the speech recognizer to favor specific words and phrases
@@ -231,14 +312,14 @@ message SpeechContext {
   // to improve the accuracy for specific words and phrases, for example, if
   // specific commands are typically spoken by the user. This can also be used
   // to add additional words to the vocabulary of the recognizer. See
-  // [usage limits](https://cloud.google.com/speech/limits#content).
+  // [usage limits](/speech-to-text/quotas#content).
   repeated string phrases = 1;
 }
 
 // Contains audio data in the encoding specified in the `RecognitionConfig`.
 // Either `content` or `uri` must be supplied. Supplying both or neither
 // returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See
-// [audio limits](https://cloud.google.com/speech/limits#content).
+// [content limits](/speech-to-text/quotas#content).
 message RecognitionAudio {
   // The audio source, which is either inline content or a Google Cloud
   // Storage uri.
@@ -249,7 +330,8 @@ message RecognitionAudio {
     bytes content = 1;
 
     // URI that points to a file that contains audio data bytes as specified in
-    // `RecognitionConfig`. Currently, only Google Cloud Storage URIs are
+    // `RecognitionConfig`. The file must not be compressed (for example, gzip).
+    // Currently, only Google Cloud Storage URIs are
     // supported, which must be specified in the following format:
     // `gs://bucket_name/object_name` (other URI formats return
     // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see
@@ -262,7 +344,7 @@ message RecognitionAudio {
 // contains the result as zero or more sequential `SpeechRecognitionResult`
 // messages.
 message RecognizeResponse {
-  // *Output-only* Sequential list of transcription results corresponding to
+  // Output only. Sequential list of transcription results corresponding to
   // sequential portions of audio.
   repeated SpeechRecognitionResult results = 2;
 }
@@ -273,7 +355,7 @@ message RecognizeResponse {
 // returned by the `GetOperation` call of the `google::longrunning::Operations`
 // service.
 message LongRunningRecognizeResponse {
-  // *Output-only* Sequential list of transcription results corresponding to
+  // Output only. Sequential list of transcription results corresponding to
   // sequential portions of audio.
   repeated SpeechRecognitionResult results = 2;
 }
@@ -358,35 +440,37 @@ message StreamingRecognizeResponse {
     END_OF_SINGLE_UTTERANCE = 1;
   }
 
-  // *Output-only* If set, returns a [google.rpc.Status][google.rpc.Status] message that
+  // Output only. If set, returns a [google.rpc.Status][google.rpc.Status] message that
   // specifies the error for the operation.
   google.rpc.Status error = 1;
 
-  // *Output-only* This repeated list contains zero or more results that
+  // Output only. This repeated list contains zero or more results that
   // correspond to consecutive portions of the audio currently being processed.
-  // It contains zero or more `is_final=false` results followed by zero or one
-  // `is_final=true` result (the newly settled portion).
+  // It contains zero or one `is_final=true` result (the newly settled portion),
+  // followed by zero or more `is_final=false` results (the interim results).
   repeated StreamingRecognitionResult results = 2;
 
-  // *Output-only* Indicates the type of speech event.
+  // Output only. Indicates the type of speech event.
   SpeechEventType speech_event_type = 4;
 }
 
 // A streaming speech recognition result corresponding to a portion of the audio
 // that is currently being processed.
 message StreamingRecognitionResult {
-  // *Output-only* May contain one or more recognition hypotheses (up to the
+  // Output only. May contain one or more recognition hypotheses (up to the
   // maximum specified in `max_alternatives`).
+  // These alternatives are ordered in terms of accuracy, with the top (first)
+  // alternative being the most probable, as ranked by the recognizer.
   repeated SpeechRecognitionAlternative alternatives = 1;
 
-  // *Output-only* If `false`, this `StreamingRecognitionResult` represents an
+  // Output only. If `false`, this `StreamingRecognitionResult` represents an
   // interim result that may change. If `true`, this is the final time the
   // speech service will return this particular `StreamingRecognitionResult`,
   // the recognizer will not return any further hypotheses for this portion of
   // the transcript and corresponding audio.
   bool is_final = 2;
 
-  // *Output-only* An estimate of the likelihood that the recognizer will not
+  // Output only. An estimate of the likelihood that the recognizer will not
   // change its guess about this interim result. Values range from 0.0
   // (completely unstable) to 1.0 (completely stable).
   // This field is only provided for interim results (`is_final=false`).
@@ -396,7 +480,7 @@ message StreamingRecognitionResult {
 
 // A speech recognition result corresponding to a portion of the audio.
 message SpeechRecognitionResult {
-  // *Output-only* May contain one or more recognition hypotheses (up to the
+  // Output only. May contain one or more recognition hypotheses (up to the
   // maximum specified in `max_alternatives`).
   // These alternatives are ordered in terms of accuracy, with the top (first)
   // alternative being the most probable, as ranked by the recognizer.
@@ -405,26 +489,25 @@ message SpeechRecognitionResult {
 
 // Alternative hypotheses (a.k.a. n-best list).
 message SpeechRecognitionAlternative {
-  // *Output-only* Transcript text representing the words that the user spoke.
+  // Output only. Transcript text representing the words that the user spoke.
   string transcript = 1;
 
-  // *Output-only* The confidence estimate between 0.0 and 1.0. A higher number
+  // Output only. The confidence estimate between 0.0 and 1.0. A higher number
   // indicates an estimated greater likelihood that the recognized words are
-  // correct. This field is typically provided only for the top hypothesis, and
-  // only for `is_final=true` results. Clients should not rely on the
-  // `confidence` field as it is not guaranteed to be accurate or consistent.
+  // correct. This field is set only for the top alternative of a non-streaming
+  // result or, of a streaming result where `is_final=true`.
+  // This field is not guaranteed to be accurate and users should not rely on it
+  // to be always provided.
   // The default of 0.0 is a sentinel value indicating `confidence` was not set.
   float confidence = 2;
 
-  // *Output-only* A list of word-specific information for each recognized word.
+  // Output only. A list of word-specific information for each recognized word.
   repeated WordInfo words = 3;
 }
 
-// Word-specific information for recognized words. Word information is only
-// included in the response when certain request parameters are set, such
-// as `enable_word_time_offsets`.
+// Word-specific information for recognized words.
 message WordInfo {
-  // *Output-only* Time offset relative to the beginning of the audio,
+  // Output only. Time offset relative to the beginning of the audio,
   // and corresponding to the start of the spoken word.
   // This field is only set if `enable_word_time_offsets=true` and only
   // in the top hypothesis.
@@ -432,7 +515,7 @@ message WordInfo {
   // vary.
   google.protobuf.Duration start_time = 1;
 
-  // *Output-only* Time offset relative to the beginning of the audio,
+  // Output only. Time offset relative to the beginning of the audio,
   // and corresponding to the end of the spoken word.
   // This field is only set if `enable_word_time_offsets=true` and only
   // in the top hypothesis.
@@ -440,6 +523,6 @@ message WordInfo {
   // vary.
   google.protobuf.Duration end_time = 2;
 
-  // *Output-only* The word corresponding to this set of information.
+  // Output only. The word corresponding to this set of information.
   string word = 3;
 }