From 09c6bd212586c0de4823f4bafa72b7989200a67f Mon Sep 17 00:00:00 2001
From: Google APIs <noreply@google.com>
Date: Mon, 21 Oct 2019 12:57:07 -0700
Subject: [PATCH] Synchronize new proto/yaml changes.

PiperOrigin-RevId: 275904143
---
 google/cloud/documentai/BUILD.bazel           |   1 +
 .../documentai/artman_documentai_v1beta1.yaml |  34 ++
 .../cloud/documentai/documentai_v1beta1.yaml  |  29 ++
 .../cloud/documentai/v1beta1/document.proto   | 446 ++++++++++++++++++
 .../v1beta1/document_understanding.proto      | 299 ++++++++++++
 .../documentai/v1beta1/documentai_gapic.yaml  | 114 +++++
 .../cloud/documentai/v1beta1/geometry.proto   |  55 +++
 7 files changed, 978 insertions(+)
 create mode 100644 google/cloud/documentai/BUILD.bazel
 create mode 100644 google/cloud/documentai/artman_documentai_v1beta1.yaml
 create mode 100644 google/cloud/documentai/documentai_v1beta1.yaml
 create mode 100644 google/cloud/documentai/v1beta1/document.proto
 create mode 100644 google/cloud/documentai/v1beta1/document_understanding.proto
 create mode 100644 google/cloud/documentai/v1beta1/documentai_gapic.yaml
 create mode 100644 google/cloud/documentai/v1beta1/geometry.proto

diff --git a/google/cloud/documentai/BUILD.bazel b/google/cloud/documentai/BUILD.bazel
new file mode 100644
index 00000000..a87c57fe
--- /dev/null
+++ b/google/cloud/documentai/BUILD.bazel
@@ -0,0 +1 @@
+exports_files(glob(["*.yaml"]))
diff --git a/google/cloud/documentai/artman_documentai_v1beta1.yaml b/google/cloud/documentai/artman_documentai_v1beta1.yaml
new file mode 100644
index 00000000..ae8ab02f
--- /dev/null
+++ b/google/cloud/documentai/artman_documentai_v1beta1.yaml
@@ -0,0 +1,34 @@
+common:
+  api_name: documentai
+  api_version: v1beta1
+  organization_name: google-cloud
+  proto_deps:
+  - name: google-common-protos
+  src_proto_paths:
+  - v1beta1
+  service_yaml: documentai_v1beta1.yaml
+  gapic_yaml: v1beta1/documentai_gapic.yaml
+artifacts:
+- name: gapic_config
+  type: GAPIC_CONFIG
+- name: java_gapic
+  type: GAPIC
+  language: JAVA
+- name: python_gapic
+  type: GAPIC
+  language: PYTHON
+- name: nodejs_gapic
+  type: GAPIC
+  language: NODEJS
+- name: php_gapic
+  type: GAPIC
+  language: PHP
+- name: go_gapic
+  type: GAPIC
+  language: GO
+- name: ruby_gapic
+  type: GAPIC
+  language: RUBY
+- name: csharp_gapic
+  type: GAPIC
+  language: CSHARP
diff --git a/google/cloud/documentai/documentai_v1beta1.yaml b/google/cloud/documentai/documentai_v1beta1.yaml
new file mode 100644
index 00000000..78865689
--- /dev/null
+++ b/google/cloud/documentai/documentai_v1beta1.yaml
@@ -0,0 +1,29 @@
+type: google.api.Service
+config_version: 3
+name: documentai.googleapis.com
+title: Cloud Document AI API
+
+apis:
+- name: google.cloud.documentai.v1beta1.DocumentUnderstandingService
+
+types:
+- name: google.cloud.documentai.v1beta1.BatchProcessDocumentsResponse
+- name: google.cloud.documentai.v1beta1.Document
+- name: google.cloud.documentai.v1beta1.OperationMetadata
+
+documentation:
+  summary: |-
+    Service to parse structured information from unstructured or
+    semi-structured documents using state-of-the-art Google AI such as natural
+    language, computer vision, translation, and AutoML.
+
+authentication:
+  rules:
+  - selector: google.cloud.documentai.v1beta1.DocumentUnderstandingService.BatchProcessDocuments
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform
+  - selector: 'google.longrunning.Operations.*'
+    oauth:
+      canonical_scopes: |-
+        https://www.googleapis.com/auth/cloud-platform
diff --git a/google/cloud/documentai/v1beta1/document.proto b/google/cloud/documentai/v1beta1/document.proto
new file mode 100644
index 00000000..1303c32d
--- /dev/null
+++ b/google/cloud/documentai/v1beta1/document.proto
@@ -0,0 +1,446 @@
+// Copyright 2019 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+syntax = "proto3";
+
+package google.cloud.documentai.v1beta1;
+
+import "google/api/annotations.proto";
+import "google/cloud/documentai/v1beta1/geometry.proto";
+import "google/rpc/status.proto";
+import "google/type/color.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai";
+option java_multiple_files = true;
+option java_outer_classname = "DocumentProto";
+option java_package = "com.google.cloud.documentai.v1beta1";
+
+// Document represents the canonical document resource in Document Understanding
+// AI.
+// It is an interchange format that provides insights into documents and allows
+// for collaboration between users and Document Understanding AI to iterate and
+// optimize for quality.
+message Document {
+  // For a large document, sharding may be performed to produce several
+  // document shards. Each document shard contains this field to detail which
+  // shard it is.
+  message ShardInfo {
+    // The 0-based index of this shard.
+    int64 shard_index = 1;
+
+    // Total number of shards.
+    int64 shard_count = 2;
+
+    // The index of the first character in
+    // [Document.text][google.cloud.documentai.v1beta1.Document.text] in the
+    // overall document global text.
+    int64 text_offset = 3;
+  }
+
+  // Annotation for common text style attributes. This adheres to CSS
+  // conventions as much as possible.
+  message Style {
+    // Font size with unit.
+    message FontSize {
+      // Font size for the text.
+      float size = 1;
+
+      // Unit for the font size. Follows CSS naming (in, px, pt, etc.).
+      string unit = 2;
+    }
+
+    // Text anchor indexing into the
+    // [Document.text][google.cloud.documentai.v1beta1.Document.text].
+    TextAnchor text_anchor = 1;
+
+    // Text color.
+    google.type.Color color = 2;
+
+    // Text background color.
+    google.type.Color background_color = 3;
+
+    // Font weight. Possible values are normal, bold, bolder, and lighter.
+    // https://www.w3schools.com/cssref/pr_font_weight.asp
+    string font_weight = 4;
+
+    // Text style. Possible values are normal, italic, and oblique.
+    // https://www.w3schools.com/cssref/pr_font_font-style.asp
+    string text_style = 5;
+
+    // Text decoration. Follows CSS standard.
+    // <text-decoration-line> <text-decoration-color> <text-decoration-style>
+    // https://www.w3schools.com/cssref/pr_text_text-decoration.asp
+    string text_decoration = 6;
+
+    // Font size.
+    FontSize font_size = 7;
+  }
+
+  // A page in a [Document][google.cloud.documentai.v1beta1.Document].
+  message Page {
+    // Dimension for the page.
+    message Dimension {
+      // Page width.
+      float width = 1;
+
+      // Page height.
+      float height = 2;
+
+      // Dimension unit.
+      string unit = 3;
+    }
+
+    // Visual element describing a layout unit on a page.
+    message Layout {
+      // Detected human reading orientation.
+      enum Orientation {
+        // Unspecified orientation.
+        ORIENTATION_UNSPECIFIED = 0;
+
+        // Orientation is aligned with page up.
+        PAGE_UP = 1;
+
+        // Orientation is aligned with page right.
+        // Turn the head 90 degrees clockwise from upright to read.
+        PAGE_RIGHT = 2;
+
+        // Orientation is aligned with page down.
+        // Turn the head 180 degrees from upright to read.
+        PAGE_DOWN = 3;
+
+        // Orientation is aligned with page left.
+        // Turn the head 90 degrees counterclockwise from upright to read.
+        PAGE_LEFT = 4;
+      }
+
+      // Text anchor indexing into the
+      // [Document.text][google.cloud.documentai.v1beta1.Document.text].
+      TextAnchor text_anchor = 1;
+
+      // Confidence of the current
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] within
+      // context of the object this layout is for. e.g. confidence can be for a
+      // single token, a table, a visual element, etc. depending on context.
+      // Range [0, 1].
+      float confidence = 2;
+
+      // The bounding polygon for the
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout].
+      BoundingPoly bounding_poly = 3;
+
+      // Detected orientation for the
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout].
+      Orientation orientation = 4;
+    }
+
+    // A block has a set of lines (collected into paragraphs) that have a
+    // common line-spacing and orientation.
+    message Block {
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
+      // [Block][google.cloud.documentai.v1beta1.Document.Page.Block].
+      Layout layout = 1;
+
+      // A list of detected languages together with confidence.
+      repeated DetectedLanguage detected_languages = 2;
+    }
+
+    // A collection of lines that a human would perceive as a paragraph.
+    message Paragraph {
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
+      // [Paragraph][google.cloud.documentai.v1beta1.Document.Page.Paragraph].
+      Layout layout = 1;
+
+      // A list of detected languages together with confidence.
+      repeated DetectedLanguage detected_languages = 2;
+    }
+
+    // A collection of tokens that a human would perceive as a line.
+    // Does not cross column boundaries, can be horizontal, vertical, etc.
+    message Line {
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
+      // [Line][google.cloud.documentai.v1beta1.Document.Page.Line].
+      Layout layout = 1;
+
+      // A list of detected languages together with confidence.
+      repeated DetectedLanguage detected_languages = 2;
+    }
+
+    // A detected token.
+    message Token {
+      // Detected break at the end of a
+      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
+      message DetectedBreak {
+        // Enum to denote the type of break found.
+        enum Type {
+          // Unspecified break type.
+          TYPE_UNSPECIFIED = 0;
+
+          // A single whitespace.
+          SPACE = 1;
+
+          // A wider whitespace.
+          WIDE_SPACE = 2;
+
+          // A hyphen that indicates that a token has been split across lines.
+          HYPHEN = 3;
+        }
+
+        // Detected break type.
+        Type type = 1;
+      }
+
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
+      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
+      Layout layout = 1;
+
+      // Detected break at the end of a
+      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
+      DetectedBreak detected_break = 2;
+
+      // A list of detected languages together with confidence.
+      repeated DetectedLanguage detected_languages = 3;
+    }
+
+    // Detected non-text visual elements e.g. checkbox, signature etc. on the
+    // page.
+    message VisualElement {
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
+      // [Token][google.cloud.documentai.v1beta1.Document.Page.Token].
+      Layout layout = 1;
+
+      // Type of the
+      // [VisualElement][google.cloud.documentai.v1beta1.Document.Page.VisualElement].
+      string type = 2;
+
+      // A list of detected languages together with confidence.
+      repeated DetectedLanguage detected_languages = 3;
+    }
+
+    // A table representation similar to HTML table structure.
+    message Table {
+      // A row of table cells.
+      message TableRow {
+        // Cells that make up this row.
+        repeated TableCell cells = 1;
+      }
+
+      // A cell representation inside the table.
+      message TableCell {
+        // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
+        // [TableCell][google.cloud.documentai.v1beta1.Document.Page.Table.TableCell].
+        Layout layout = 1;
+
+        // How many rows this cell spans.
+        int32 row_span = 2;
+
+        // How many columns this cell spans.
+        int32 col_span = 3;
+
+        // A list of detected languages together with confidence.
+        repeated DetectedLanguage detected_languages = 4;
+      }
+
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for
+      // [Table][google.cloud.documentai.v1beta1.Document.Page.Table].
+      Layout layout = 1;
+
+      // Header rows of the table.
+      repeated TableRow header_rows = 2;
+
+      // Body rows of the table.
+      repeated TableRow body_rows = 3;
+
+      // A list of detected languages together with confidence.
+      repeated DetectedLanguage detected_languages = 4;
+    }
+
+    // A form field detected on the page.
+    message FormField {
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
+      // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField]
+      // name. e.g. `Address`, `Email`, `Grand total`, `Phone number`, etc.
+      Layout field_name = 1;
+
+      // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
+      // [FormField][google.cloud.documentai.v1beta1.Document.Page.FormField]
+      // value.
+      Layout field_value = 2;
+
+      // A list of detected languages for name together with confidence.
+      repeated DetectedLanguage name_detected_languages = 3;
+
+      // A list of detected languages for value together with confidence.
+      repeated DetectedLanguage value_detected_languages = 4;
+    }
+
+    // Detected language for a structural component.
+    message DetectedLanguage {
+      // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
+      // information, see
+      // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
+      string language_code = 1;
+
+      // Confidence of detected language. Range [0, 1].
+      float confidence = 2;
+    }
+
+    // 1-based index for current
+    // [Page][google.cloud.documentai.v1beta1.Document.Page] in a parent
+    // [Document][google.cloud.documentai.v1beta1.Document]. Useful when a page
+    // is taken out of a [Document][google.cloud.documentai.v1beta1.Document]
+    // for individual processing.
+    int32 page_number = 1;
+
+    // Physical dimension of the page.
+    Dimension dimension = 2;
+
+    // [Layout][google.cloud.documentai.v1beta1.Document.Page.Layout] for the
+    // page.
+    Layout layout = 3;
+
+    // A list of detected languages together with confidence.
+    repeated DetectedLanguage detected_languages = 4;
+
+    // A list of visually detected text blocks on the page.
+    // A block has a set of lines (collected into paragraphs) that have a common
+    // line-spacing and orientation.
+    repeated Block blocks = 5;
+
+    // A list of visually detected text paragraphs on the page.
+    // A collection of lines that a human would perceive as a paragraph.
+    repeated Paragraph paragraphs = 6;
+
+    // A list of visually detected text lines on the page.
+    // A collection of tokens that a human would perceive as a line.
+    repeated Line lines = 7;
+
+    // A list of visually detected tokens on the page.
+    repeated Token tokens = 8;
+
+    // A list of detected non-text visual elements e.g. checkbox,
+    // signature etc. on the page.
+    repeated VisualElement visual_elements = 9;
+
+    // A list of visually detected tables on the page.
+    repeated Table tables = 10;
+
+    // A list of visually detected form fields on the page.
+    repeated FormField form_fields = 11;
+  }
+
+  // A phrase in the text that is a known entity type, such as a person, an
+  // organization, or location.
+  message Entity {
+    // Provenance of the entity.
+    // Text anchor indexing into the
+    // [Document.text][google.cloud.documentai.v1beta1.Document.text].
+    TextAnchor text_anchor = 1;
+
+    // Entity type from a schema e.g. `Address`.
+    string type = 2;
+
+    // Text value in the document e.g. `1600 Amphitheatre Pkwy`.
+    string mention_text = 3;
+
+    // Canonical mention name. This will be a unique value in the entity list
+    // for this document.
+    string mention_id = 4;
+  }
+
+  // Relationship between
+  // [Entities][google.cloud.documentai.v1beta1.Document.Entity].
+  message EntityRelation {
+    // Subject entity mention_id.
+    string subject_id = 1;
+
+    // Object entity mention_id.
+    string object_id = 2;
+
+    // Relationship description.
+    string relation = 3;
+  }
+
+  // Text reference indexing into the
+  // [Document.text][google.cloud.documentai.v1beta1.Document.text].
+  message TextAnchor {
+    // A text segment in the
+    // [Document.text][google.cloud.documentai.v1beta1.Document.text]. The
+    // indices may be out of bounds which indicate that the text extends into
+    // another document shard for large sharded documents. See
+    // [ShardInfo.text_offset][google.cloud.documentai.v1beta1.Document.ShardInfo.text_offset]
+    message TextSegment {
+      // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment]
+      // start UTF-8 char index in the
+      // [Document.text][google.cloud.documentai.v1beta1.Document.text].
+      int64 start_index = 1;
+
+      // [TextSegment][google.cloud.documentai.v1beta1.Document.TextAnchor.TextSegment]
+      // half open end UTF-8 char index in the
+      // [Document.text][google.cloud.documentai.v1beta1.Document.text].
+      int64 end_index = 2;
+    }
+
+    // The text segments from the
+    // [Document.text][google.cloud.documentai.v1beta1.Document.text].
+    repeated TextSegment text_segments = 1;
+  }
+
+  // Original source document from the user.
+  oneof source {
+    // Currently supports Google Cloud Storage URI of the form
+    //    `gs://bucket_name/object_name`. Object versioning is not supported.
+    //    See [Google Cloud Storage Request
+    //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
+    //    info.
+    string uri = 1;
+
+    // Inline document content, represented as a stream of bytes.
+    // Note: As with all `bytes` fields, protobuffers use a pure binary
+    // representation, whereas JSON representations use base64.
+    bytes content = 2;
+  }
+
+  // An IANA published MIME type (also referred to as media type). For more
+  // information, see
+  // https://www.iana.org/assignments/media-types/media-types.xhtml.
+  string mime_type = 3;
+
+  // UTF-8 encoded text in reading order from the document.
+  string text = 4;
+
+  // Styles for the
+  // [Document.text][google.cloud.documentai.v1beta1.Document.text].
+  repeated Style text_styles = 5;
+
+  // Visual page layout for the
+  // [Document][google.cloud.documentai.v1beta1.Document].
+  repeated Page pages = 6;
+
+  // A list of entities detected on
+  // [Document.text][google.cloud.documentai.v1beta1.Document.text]. For
+  // document shards, entities in this list may cross shard boundaries.
+  repeated Entity entities = 7;
+
+  // Relationship among
+  // [Document.entities][google.cloud.documentai.v1beta1.Document.entities].
+  repeated EntityRelation entity_relations = 8;
+
+  // Information about the sharding if this document is sharded part of a larger
+  // document. If the document is not sharded, this message is not specified.
+  ShardInfo shard_info = 9;
+
+  // Any error that occurred while processing this document.
+  google.rpc.Status error = 10;
+}
diff --git a/google/cloud/documentai/v1beta1/document_understanding.proto b/google/cloud/documentai/v1beta1/document_understanding.proto
new file mode 100644
index 00000000..4f8dfb72
--- /dev/null
+++ b/google/cloud/documentai/v1beta1/document_understanding.proto
@@ -0,0 +1,299 @@
+// Copyright 2019 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+syntax = "proto3";
+
+package google.cloud.documentai.v1beta1;
+
+import "google/api/annotations.proto";
+import "google/api/client.proto";
+import "google/api/field_behavior.proto";
+import "google/cloud/documentai/v1beta1/geometry.proto";
+import "google/longrunning/operations.proto";
+import "google/protobuf/timestamp.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai";
+option java_multiple_files = true;
+option java_outer_classname = "DocumentAiProto";
+option java_package = "com.google.cloud.documentai.v1beta1";
+
+// Service to parse structured information from unstructured or semi-structured
+// documents using state-of-the-art Google AI such as natural language,
+// computer vision, and translation.
+service DocumentUnderstandingService {
+  option (google.api.default_host) = "documentai.googleapis.com";
+  option (google.api.oauth_scopes) =
+      "https://www.googleapis.com/auth/cloud-platform";
+
+  // LRO endpoint to batch process many documents.
+  rpc BatchProcessDocuments(BatchProcessDocumentsRequest)
+      returns (google.longrunning.Operation) {
+    option (google.api.http) = {
+      post: "/v1beta1/{parent=projects/*/locations/*}/documents:batchProcess"
+      body: "*"
+      additional_bindings {
+        post: "/v1beta1/{parent=projects/*}/documents:batchProcess"
+        body: "*"
+      }
+    };
+    option (google.api.method_signature) = "requests";
+    option (google.longrunning.operation_info) = {
+      response_type: "BatchProcessDocumentsResponse"
+      metadata_type: "OperationMetadata"
+    };
+  }
+}
+
+// Request to batch process documents as an asynchronous operation.
+message BatchProcessDocumentsRequest {
+  // Required. Individual requests for each document.
+  repeated ProcessDocumentRequest requests = 1
+      [(google.api.field_behavior) = REQUIRED];
+
+  // Target project and location to make a call.
+  //
+  // Format: `projects/{project-id}/locations/{location-id}`.
+  //
+  // If no location is specified, a region will be chosen automatically.
+  string parent = 2;
+}
+
+// Request to process one document.
+message ProcessDocumentRequest {
+  // Required. Information about the input file.
+  InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Required. The desired output location.
+  OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED];
+
+  // Specifies a known document type for deeper structure detection. Valid
+  // values are currently "general" and "invoice". If not provided, "general"\
+  // is used as default. If any other value is given, the request is rejected.
+  string document_type = 3;
+
+  // Controls table extraction behavior. If not specified, the system will
+  // decide reasonable defaults.
+  TableExtractionParams table_extraction_params = 4;
+
+  // Controls form extraction behavior. If not specified, the system will
+  // decide reasonable defaults.
+  FormExtractionParams form_extraction_params = 5;
+
+  // Controls entity extraction behavior. If not specified, the system will
+  // decide reasonable defaults.
+  EntityExtractionParams entity_extraction_params = 6;
+
+  // Controls OCR behavior. If not specified, the system will decide reasonable
+  // defaults.
+  OcrParams ocr_params = 7;
+}
+
+// Response to an batch document processing request. This is returned in
+// the LRO Operation after the operation is complete.
+message BatchProcessDocumentsResponse {
+  // Responses for each individual document.
+  repeated ProcessDocumentResponse responses = 1;
+}
+
+// Response to a single document processing request.
+message ProcessDocumentResponse {
+  // Information about the input file. This is the same as the corresponding
+  // input config in the request.
+  InputConfig input_config = 1;
+
+  // The output location of the parsed responses. The responses are written to
+  // this location as JSON-serialized `Document` objects.
+  OutputConfig output_config = 2;
+}
+
+// Parameters to control Optical Character Recognition (OCR) behavior.
+message OcrParams {
+  // List of languages to use for OCR. In most cases, an empty value
+  // yields the best results since it enables automatic language detection. For
+  // languages based on the Latin alphabet, setting `language_hints` is not
+  // needed. In rare cases, when the language of the text in the image is known,
+  // setting a hint will help get better results (although it will be a
+  // significant hindrance if the hint is wrong). Document processing returns an
+  // error if one or more of the specified languages is not one of the
+  // supported languages.
+  repeated string language_hints = 1;
+}
+
+// Parameters to control table extraction behavior.
+message TableExtractionParams {
+  // Whether to enable table extraction.
+  bool enabled = 1;
+
+  // Optional. Table bounding box hints that can be provided to complex cases
+  // which our algorithm cannot locate the table(s) in.
+  repeated TableBoundHint table_bound_hints = 2
+      [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Table header hints. The extraction will bias towards producing
+  // these terms as table headers, which may improve accuracy.
+  repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL];
+
+  // Model version of the table extraction system. Default is "builtin/stable".
+  // Specify "builtin/latest" for the latest model.
+  string model_version = 4;
+}
+
+// A hint for a table bounding box on the page for table parsing.
+message TableBoundHint {
+  // Optional. Page number for multi-paged inputs this hint applies to. If not
+  // provided, this hint will apply to all pages by default. This value is
+  // 1-based.
+  int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL];
+
+  // Bounding box hint for a table on this page. The coordinates must be
+  // normalized to [0,1] and the bounding box must be an axis-aligned rectangle.
+  BoundingPoly bounding_box = 2;
+}
+
+// Parameters to control form extraction behavior.
+message FormExtractionParams {
+  // Whether to enable form extraction.
+  bool enabled = 1;
+
+  // User can provide pairs of (key text, value type) to improve the parsing
+  // result.
+  //
+  // For example, if a document has a field called "Date" that holds a date
+  // value and a field called "Amount" that may hold either a currency value
+  // (e.g., "$500.00") or a simple number value (e.g., "20"), you could use the
+  // following hints: [ {"key": "Date", value_types: [ "DATE"]}, {"key":
+  // "Amount", "value_types": [ "PRICE", "NUMBER" ]} ]
+  //
+  // If the value type is unknown, but you want to provide hints for the keys,
+  // you can leave the value_types field blank. e.g. {"key": "Date",
+  // "value_types": []}
+  repeated KeyValuePairHint key_value_pair_hints = 2;
+
+  // Model version of the form extraction system. Default is
+  // "builtin/stable". Specify "builtin/latest" for the latest model.
+  string model_version = 3;
+}
+
+// User-provided hint for key value pair.
+message KeyValuePairHint {
+  // The key text for the hint.
+  string key = 1;
+
+  // Type of the value. This is case-insensitive, and could be one of:
+  // ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER,
+  // ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will
+  // be ignored.
+  repeated string value_types = 2;
+}
+
+// Parameters to control entity extraction behavior.
+message EntityExtractionParams {
+  // Whether to enable entity extraction.
+  bool enabled = 1;
+
+  // Model version of the entity extraction. Default is
+  // "builtin/stable". Specify "builtin/latest" for the latest model.
+  string model_version = 2;
+}
+
+// The desired input location and metadata.
+message InputConfig {
+  // Required.
+  oneof source {
+    // The Google Cloud Storage location to read the input from. This must be a
+    // single file.
+    GcsSource gcs_source = 1;
+  }
+
+  // Required. Mimetype of the input. Current supported mimetypes are
+  // application/pdf, image/tiff, and image/gif.
+  string mime_type = 2 [(google.api.field_behavior) = REQUIRED];
+}
+
+// The desired output location and metadata.
+message OutputConfig {
+  // Required.
+  oneof destination {
+    // The Google Cloud Storage location to write the output to.
+    GcsDestination gcs_destination = 1;
+  }
+
+  // The max number of pages to include into each output Document shard JSON on
+  // Google Cloud Storage.
+  //
+  // The valid range is [1, 100]. If not specified, the default value is 20.
+  //
+  // For example, for one pdf file with 100 pages, 100 parsed pages will be
+  // produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each
+  // containing 20 parsed pages will be written under the prefix
+  // [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where
+  // x and y are 1-indexed page numbers.
+  //
+  // Example GCS outputs with 157 pages and pages_per_shard = 50:
+  //
+  // <prefix>pages-001-to-050.json
+  // <prefix>pages-051-to-100.json
+  // <prefix>pages-101-to-150.json
+  // <prefix>pages-151-to-157.json
+  int32 pages_per_shard = 2;
+}
+
+// The Google Cloud Storage location where the input file will be read from.
+message GcsSource {
+  string uri = 1 [(google.api.field_behavior) = REQUIRED];
+}
+
+// The Google Cloud Storage location where the output file will be written to.
+message GcsDestination {
+  string uri = 1 [(google.api.field_behavior) = REQUIRED];
+}
+
+// Contains metadata for the BatchProcessDocuments operation.
+message OperationMetadata {
+  enum State {
+    // The default value. This value is used if the state is omitted.
+    STATE_UNSPECIFIED = 0;
+
+    // Request is received.
+    ACCEPTED = 1;
+
+    // Request operation is waiting for scheduling.
+    WAITING = 2;
+
+    // Request is being processed.
+    RUNNING = 3;
+
+    // The batch processing completed successfully.
+    SUCCEEDED = 4;
+
+    // The batch processing was cancelled.
+    CANCELLED = 5;
+
+    // The batch processing has failed.
+    FAILED = 6;
+  }
+
+  // The state of the current batch processing.
+  State state = 1;
+
+  // A message providing more details about the current state of processing.
+  string state_message = 2;
+
+  // The creation time of the operation.
+  google.protobuf.Timestamp create_time = 3;
+
+  // The last update time of the operation.
+  google.protobuf.Timestamp update_time = 4;
+}
diff --git a/google/cloud/documentai/v1beta1/documentai_gapic.yaml b/google/cloud/documentai/v1beta1/documentai_gapic.yaml
new file mode 100644
index 00000000..b9c5b8f8
--- /dev/null
+++ b/google/cloud/documentai/v1beta1/documentai_gapic.yaml
@@ -0,0 +1,114 @@
+type: com.google.api.codegen.ConfigProto
+config_schema_version: 1.0.0
+# The settings of generated code in a specific language.
+language_settings:
+  java:
+    package_name: com.google.cloud.documentai.v1beta1
+  python:
+    package_name: google.cloud.documentai_v1beta1.gapic
+  go:
+    package_name: cloud.google.com/go/documentai/apiv1beta1
+  csharp:
+    package_name: Google.Cloud.DocumentAi.V1beta1
+  ruby:
+    package_name: Google::Cloud::DocumentAi::V1beta1
+  php:
+    package_name: Google\Cloud\DocumentAi\V1beta1
+  nodejs:
+    package_name: documentai.v1beta1
+# A list of API interface configurations.
+interfaces:
+# The fully qualified name of the API interface.
+- name: google.cloud.documentai.v1beta1.DocumentUnderstandingService
+  # A list of resource collection configurations.
+  # Consists of a name_pattern and an entity_name.
+  # The name_pattern is a pattern to describe the names of the resources of this
+  # collection, using the platform's conventions for URI patterns. A generator
+  # may use this to generate methods to compose and decompose such names. The
+  # pattern should use named placeholders as in `shelves/{shelf}/books/{book}`;
+  # those will be taken as hints for the parameter names of the generated
+  # methods. If empty, no name methods are generated.
+  # The entity_name is the name to be used as a basis for generated methods and
+  # classes.
+  collections: []
+  # Definition for retryable codes.
+  retry_codes_def:
+  - name: idempotent
+    retry_codes:
+    - DEADLINE_EXCEEDED
+    - UNAVAILABLE
+  - name: non_idempotent
+    retry_codes: []
+  # Definition for retry/backoff parameters.
+  retry_params_def:
+  - name: default
+    initial_retry_delay_millis: 100
+    retry_delay_multiplier: 1.3
+    max_retry_delay_millis: 60000
+    initial_rpc_timeout_millis: 20000
+    rpc_timeout_multiplier: 1
+    max_rpc_timeout_millis: 20000
+    total_timeout_millis: 600000
+  # A list of method configurations.
+  # Common properties:
+  #
+  #   name - The simple name of the method.
+  #
+  #   flattening - Specifies the configuration for parameter flattening.
+  #   Describes the parameter groups for which a generator should produce method
+  #   overloads which allow a client to directly pass request message fields as
+  #   method parameters. This information may or may not be used, depending on
+  #   the target language.
+  #   Consists of groups, which each represent a list of parameters to be
+  #   flattened. Each parameter listed must be a field of the request message.
+  #
+  #   required_fields - Fields that are always required for a request to be
+  #   valid.
+  #
+  #   page_streaming - Specifies the configuration for paging.
+  #   Describes information for generating a method which transforms a paging
+  #   list RPC into a stream of resources.
+  #   Consists of a request and a response.
+  #   The request specifies request information of the list method. It defines
+  #   which fields match the paging pattern in the request. The request consists
+  #   of a page_size_field and a token_field. The page_size_field is the name of
+  #   the optional field specifying the maximum number of elements to be
+  #   returned in the response. The token_field is the name of the field in the
+  #   request containing the page token.
+  #   The response specifies response information of the list method. It defines
+  #   which fields match the paging pattern in the response. The response
+  #   consists of a token_field and a resources_field. The token_field is the
+  #   name of the field in the response containing the next page token. The
+  #   resources_field is the name of the field in the response containing the
+  #   list of resources belonging to the page.
+  #
+  #   retry_codes_name - Specifies the configuration for retryable codes. The
+  #   name must be defined in interfaces.retry_codes_def.
+  #
+  #   retry_params_name - Specifies the configuration for retry/backoff
+  #   parameters. The name must be defined in interfaces.retry_params_def.
+  #
+  #   field_name_patterns - Maps the field name of the request type to
+  #   entity_name of interfaces.collections.
+  #   Specifies the string pattern that the field must follow.
+  #
+  #   timeout_millis - Specifies the default timeout for a non-retrying call. If
+  #   the call is retrying, refer to retry_params_name instead.
+  methods:
+  - name: BatchProcessDocuments
+    flattening:
+      groups:
+      - parameters:
+        - requests
+    required_fields:
+    - requests
+    retry_codes_name: idempotent
+    retry_params_name: default
+    long_running:
+      return_type: google.cloud.documentai.v1beta1.BatchProcessDocumentsResponse
+      metadata_type: google.cloud.documentai.v1beta1.OperationMetadata
+      initial_poll_delay_millis: 20000
+      poll_delay_multiplier: 1.5
+      max_poll_delay_millis: 45000
+      total_poll_timeout_millis: 86400000
+    timeout_millis: 60000
diff --git a/google/cloud/documentai/v1beta1/geometry.proto b/google/cloud/documentai/v1beta1/geometry.proto
new file mode 100644
index 00000000..9dbe2b78
--- /dev/null
+++ b/google/cloud/documentai/v1beta1/geometry.proto
@@ -0,0 +1,55 @@
+// Copyright 2019 Google LLC.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+syntax = "proto3";
+
+package google.cloud.documentai.v1beta1;
+
+import "google/api/annotations.proto";
+
+option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta1;documentai";
+option java_multiple_files = true;
+option java_outer_classname = "GeometryProto";
+option java_package = "com.google.cloud.documentai.v1beta1";
+
+// A vertex represents a 2D point in the image.
+// NOTE: the vertex coordinates are in the same scale as the original image.
+message Vertex {
+  // X coordinate.
+  int32 x = 1;
+
+  // Y coordinate.
+  int32 y = 2;
+}
+
+// A vertex represents a 2D point in the image.
+// NOTE: the normalized vertex coordinates are relative to the original image
+// and range from 0 to 1.
+message NormalizedVertex {
+  // X coordinate.
+  float x = 1;
+
+  // Y coordinate.
+  float y = 2;
+}
+
+// A bounding polygon for the detected image annotation.
+message BoundingPoly {
+  // The bounding polygon vertices.
+  repeated Vertex vertices = 1;
+
+  // The bounding polygon normalized vertices.
+  repeated NormalizedVertex normalized_vertices = 2;
+}