feat: publish documentai/v1beta2 protos

PiperOrigin-RevId: 300656808
This commit is contained in:
Google APIs 2020-03-12 17:14:44 -07:00 committed by Copybara-Service
parent 5202a9e0d9
commit c6fbac11af
7 changed files with 1462 additions and 0 deletions

View File

@ -0,0 +1,372 @@
# This file was automatically generated by BuildFileGenerator
# This is an API workspace, having public visibility by default makes perfect sense.
package(default_visibility = ["//visibility:public"])
##############################################################################
# Common
##############################################################################
load("@rules_proto//proto:defs.bzl", "proto_library")
load("@com_google_googleapis_imports//:imports.bzl", "proto_library_with_info")
proto_library(
name = "documentai_proto",
srcs = [
"document.proto",
"document_understanding.proto",
"geometry.proto",
],
deps = [
"//google/api:annotations_proto",
"//google/api:client_proto",
"//google/api:field_behavior_proto",
"//google/longrunning:operations_proto",
"//google/rpc:status_proto",
"//google/type:color_proto",
"@com_google_protobuf//:timestamp_proto",
],
)
proto_library_with_info(
name = "documentai_proto_with_info",
deps = [
":documentai_proto",
"//google/cloud:common_resources_proto",
],
)
##############################################################################
# Java
##############################################################################
load(
"@com_google_googleapis_imports//:imports.bzl",
"java_gapic_assembly_gradle_pkg",
"java_gapic_library",
"java_gapic_test",
"java_grpc_library",
"java_proto_library",
)
java_proto_library(
name = "documentai_java_proto",
deps = [":documentai_proto"],
)
java_grpc_library(
name = "documentai_java_grpc",
srcs = [":documentai_proto"],
deps = [":documentai_java_proto"],
)
java_gapic_library(
name = "documentai_java_gapic",
src = ":documentai_proto_with_info",
gapic_yaml = "documentai_gapic.yaml",
package = "google.cloud.documentai.v1beta2",
service_yaml = "documentai_v1beta2.yaml",
test_deps = [
":documentai_java_grpc",
],
deps = [
":documentai_java_proto",
],
)
java_gapic_test(
name = "documentai_java_gapic_test_suite",
test_classes = [
"com.google.cloud.documentai.v1beta2.DocumentUnderstandingServiceClientTest",
],
runtime_deps = [":documentai_java_gapic_test"],
)
# Open Source Packages
java_gapic_assembly_gradle_pkg(
name = "google-cloud-documentai-v1beta2-java",
deps = [
":documentai_java_gapic",
":documentai_java_grpc",
":documentai_java_proto",
":documentai_proto",
],
)
##############################################################################
# Go
##############################################################################
load(
"@com_google_googleapis_imports//:imports.bzl",
"go_gapic_assembly_pkg",
"go_gapic_library",
"go_proto_library",
"go_test",
)
go_proto_library(
name = "documentai_go_proto",
compilers = ["@io_bazel_rules_go//proto:go_grpc"],
importpath = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2",
protos = [":documentai_proto"],
deps = [
"//google/api:annotations_go_proto",
"//google/longrunning:longrunning_go_proto",
"//google/rpc:status_go_proto",
"//google/type:color_go_proto",
],
)
go_gapic_library(
name = "documentai_go_gapic",
src = ":documentai_proto_with_info",
gapic_yaml = "documentai_gapic.yaml",
importpath = "cloud.google.com/go/documentai/apiv1beta2",
package = "google.cloud.documentai.v1beta2",
service_yaml = "documentai_v1beta2.yaml",
deps = [
":documentai_go_proto",
"//google/longrunning:longrunning_go_gapic",
"//google/longrunning:longrunning_go_proto",
"@com_google_cloud_go//longrunning:go_default_library",
],
)
go_test(
name = "documentai_go_gapic_test",
srcs = [":documentai_go_gapic_srcjar_test"],
embed = [":documentai_go_gapic"],
importpath = "cloud.google.com/go/documentai/apiv1beta2",
)
# Open Source Packages
go_gapic_assembly_pkg(
name = "gapi-cloud-documentai-v1beta2-go",
deps = [
":documentai_go_gapic",
":documentai_go_gapic_srcjar-smoke-test.srcjar",
":documentai_go_gapic_srcjar-test.srcjar",
":documentai_go_proto",
],
)
##############################################################################
# Python
##############################################################################
load(
"@com_google_googleapis_imports//:imports.bzl",
"moved_proto_library",
"py_gapic_assembly_pkg",
"py_gapic_library",
"py_grpc_library",
"py_proto_library",
)
moved_proto_library(
name = "documentai_moved_proto",
srcs = [":documentai_proto"],
deps = [
"//google/api:annotations_proto",
"//google/api:client_proto",
"//google/api:field_behavior_proto",
"//google/longrunning:operations_proto",
"//google/rpc:status_proto",
"//google/type:color_proto",
"@com_google_protobuf//:timestamp_proto",
],
)
py_proto_library(
name = "documentai_py_proto",
plugin = "@protoc_docs_plugin//:docs_plugin",
deps = [":documentai_moved_proto"],
)
py_grpc_library(
name = "documentai_py_grpc",
srcs = [":documentai_moved_proto"],
deps = [":documentai_py_proto"],
)
py_gapic_library(
name = "documentai_py_gapic",
src = ":documentai_proto_with_info",
gapic_yaml = "documentai_gapic.yaml",
package = "google.cloud.documentai.v1beta2",
service_yaml = "documentai_v1beta2.yaml",
deps = [
":documentai_py_grpc",
":documentai_py_proto",
],
)
# Open Source Packages
py_gapic_assembly_pkg(
name = "documentai-v1beta2-py",
deps = [
":documentai_py_gapic",
":documentai_py_grpc",
":documentai_py_proto",
],
)
##############################################################################
# PHP
##############################################################################
load(
"@com_google_googleapis_imports//:imports.bzl",
"php_gapic_assembly_pkg",
"php_gapic_library",
"php_grpc_library",
"php_proto_library",
)
php_proto_library(
name = "documentai_php_proto",
deps = [":documentai_proto"],
)
php_grpc_library(
name = "documentai_php_grpc",
srcs = [":documentai_proto"],
deps = [":documentai_php_proto"],
)
php_gapic_library(
name = "documentai_php_gapic",
src = ":documentai_proto_with_info",
gapic_yaml = "documentai_gapic.yaml",
package = "google.cloud.documentai.v1beta2",
service_yaml = "documentai_v1beta2.yaml",
deps = [
":documentai_php_grpc",
":documentai_php_proto",
],
)
# Open Source Packages
php_gapic_assembly_pkg(
name = "google-cloud-documentai-v1beta2-php",
deps = [
":documentai_php_gapic",
":documentai_php_grpc",
":documentai_php_proto",
],
)
##############################################################################
# Node.js
##############################################################################
load(
"@com_google_googleapis_imports//:imports.bzl",
"nodejs_gapic_assembly_pkg",
"nodejs_gapic_library",
)
nodejs_gapic_library(
name = "documentai_nodejs_gapic",
src = ":documentai_proto_with_info",
gapic_yaml = "documentai_gapic.yaml",
package = "google.cloud.documentai.v1beta2",
service_yaml = "documentai_v1beta2.yaml",
deps = [],
)
nodejs_gapic_assembly_pkg(
name = "documentai-v1beta2-nodejs",
deps = [
":documentai_nodejs_gapic",
":documentai_proto",
],
)
##############################################################################
# Ruby
##############################################################################
load(
"@com_google_googleapis_imports//:imports.bzl",
"ruby_gapic_assembly_pkg",
"ruby_gapic_library",
"ruby_grpc_library",
"ruby_proto_library",
)
ruby_proto_library(
name = "documentai_ruby_proto",
deps = [":documentai_proto"],
)
ruby_grpc_library(
name = "documentai_ruby_grpc",
srcs = [":documentai_proto"],
deps = [":documentai_ruby_proto"],
)
ruby_gapic_library(
name = "documentai_ruby_gapic",
src = ":documentai_proto_with_info",
gapic_yaml = "documentai_gapic.yaml",
package = "google.cloud.documentai.v1beta2",
service_yaml = "documentai_v1beta2.yaml",
deps = [
":documentai_ruby_grpc",
":documentai_ruby_proto",
],
)
# Open Source Packages
ruby_gapic_assembly_pkg(
name = "google-cloud-documentai-v1beta2-ruby",
deps = [
":documentai_ruby_gapic",
":documentai_ruby_grpc",
":documentai_ruby_proto",
],
)
##############################################################################
# C#
##############################################################################
load(
"@com_google_googleapis_imports//:imports.bzl",
"csharp_gapic_assembly_pkg",
"csharp_gapic_library",
"csharp_grpc_library",
"csharp_proto_library",
)
csharp_proto_library(
name = "documentai_csharp_proto",
deps = [":documentai_proto"],
)
csharp_grpc_library(
name = "documentai_csharp_grpc",
srcs = [":documentai_proto"],
deps = [":documentai_csharp_proto"],
)
csharp_gapic_library(
name = "documentai_csharp_gapic",
src = ":documentai_proto_with_info",
gapic_yaml = "documentai_gapic.yaml",
package = "google.cloud.documentai.v1beta2",
service_yaml = "documentai_v1beta2.yaml",
deps = [
":documentai_csharp_grpc",
":documentai_csharp_proto",
],
)
# Open Source Packages
csharp_gapic_assembly_pkg(
name = "google-cloud-documentai-v1beta2-csharp",
deps = [
":documentai_csharp_gapic",
":documentai_csharp_grpc",
":documentai_csharp_proto",
],
)
##############################################################################
# C++
##############################################################################
# Put your C++ rules here

View File

@ -0,0 +1,516 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta2;
import "google/api/field_behavior.proto";
import "google/cloud/documentai/v1beta2/geometry.proto";
import "google/rpc/status.proto";
import "google/type/color.proto";
import "google/api/annotations.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
option java_multiple_files = true;
option java_outer_classname = "DocumentProto";
option java_package = "com.google.cloud.documentai.v1beta2";
// Document represents the canonical document resource in Document Understanding
// AI.
// It is an interchange format that provides insights into documents and allows
// for collaboration between users and Document Understanding AI to iterate and
// optimize for quality.
message Document {
// For a large document, sharding may be performed to produce several
// document shards. Each document shard contains this field to detail which
// shard it is.
message ShardInfo {
// The 0-based index of this shard.
int64 shard_index = 1;
// Total number of shards.
int64 shard_count = 2;
// The index of the first character in [Document.text][google.cloud.documentai.v1beta2.Document.text] in the overall
// document global text.
int64 text_offset = 3;
}
// Label attaches schema information and/or other metadata to segments within
// a [Document][google.cloud.documentai.v1beta2.Document]. Multiple [Label][google.cloud.documentai.v1beta2.Document.Label]s on a single field can denote either
// different labels, different instances of the same label created at
// different times, or some combination of both.
message Label {
// Provenance of the label.
oneof source {
// Label is generated AutoML model. This field stores the full resource
// name of the AutoML model.
//
// Format:
// `projects/{project-id}/locations/{location-id}/models/{model-id}`
string automl_model = 2;
}
// Name of the label.
//
// When the label is generated from AutoML Text Classification model, this
// field represents the name of the category.
string name = 1;
// Confidence score between 0 and 1 for label assignment.
float confidence = 3;
}
// Annotation for common text style attributes. This adheres to CSS
// conventions as much as possible.
message Style {
// Font size with unit.
message FontSize {
// Font size for the text.
float size = 1;
// Unit for the font size. Follows CSS naming (in, px, pt, etc.).
string unit = 2;
}
// Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
TextAnchor text_anchor = 1;
// Text color.
google.type.Color color = 2;
// Text background color.
google.type.Color background_color = 3;
// Font weight. Possible values are normal, bold, bolder, and lighter.
// https://www.w3schools.com/cssref/pr_font_weight.asp
string font_weight = 4;
// Text style. Possible values are normal, italic, and oblique.
// https://www.w3schools.com/cssref/pr_font_font-style.asp
string text_style = 5;
// Text decoration. Follows CSS standard.
// <text-decoration-line> <text-decoration-color> <text-decoration-style>
// https://www.w3schools.com/cssref/pr_text_text-decoration.asp
string text_decoration = 6;
// Font size.
FontSize font_size = 7;
}
// A page in a [Document][google.cloud.documentai.v1beta2.Document].
message Page {
// Dimension for the page.
message Dimension {
// Page width.
float width = 1;
// Page height.
float height = 2;
// Dimension unit.
string unit = 3;
}
// Visual element describing a layout unit on a page.
message Layout {
// Detected human reading orientation.
enum Orientation {
// Unspecified orientation.
ORIENTATION_UNSPECIFIED = 0;
// Orientation is aligned with page up.
PAGE_UP = 1;
// Orientation is aligned with page right.
// Turn the head 90 degrees clockwise from upright to read.
PAGE_RIGHT = 2;
// Orientation is aligned with page down.
// Turn the head 180 degrees from upright to read.
PAGE_DOWN = 3;
// Orientation is aligned with page left.
// Turn the head 90 degrees counterclockwise from upright to read.
PAGE_LEFT = 4;
}
// Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
TextAnchor text_anchor = 1;
// Confidence of the current [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] within context of the object this
// layout is for. e.g. confidence can be for a single token, a table,
// a visual element, etc. depending on context. Range [0, 1].
float confidence = 2;
// The bounding polygon for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout].
BoundingPoly bounding_poly = 3;
// Detected orientation for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout].
Orientation orientation = 4;
// Optional. This is the identifier used by referencing [PageAnchor][google.cloud.documentai.v1beta2.Document.PageAnchor]s.
string id = 5 [(google.api.field_behavior) = OPTIONAL];
}
// A block has a set of lines (collected into paragraphs) that have a
// common line-spacing and orientation.
message Block {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Block][google.cloud.documentai.v1beta2.Document.Page.Block].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
}
// A collection of lines that a human would perceive as a paragraph.
message Paragraph {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1beta2.Document.Page.Paragraph].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
}
// A collection of tokens that a human would perceive as a line.
// Does not cross column boundaries, can be horizontal, vertical, etc.
message Line {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Line][google.cloud.documentai.v1beta2.Document.Page.Line].
Layout layout = 1;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 2;
}
// A detected token.
message Token {
// Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
message DetectedBreak {
// Enum to denote the type of break found.
enum Type {
// Unspecified break type.
TYPE_UNSPECIFIED = 0;
// A single whitespace.
SPACE = 1;
// A wider whitespace.
WIDE_SPACE = 2;
// A hyphen that indicates that a token has been split across lines.
HYPHEN = 3;
}
// Detected break type.
Type type = 1;
}
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
Layout layout = 1;
// Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
DetectedBreak detected_break = 2;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 3;
}
// Detected non-text visual elements e.g. checkbox, signature etc. on the
// page.
message VisualElement {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement].
Layout layout = 1;
// Type of the [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement].
string type = 2;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 3;
}
// A table representation similar to HTML table structure.
message Table {
// A row of table cells.
message TableRow {
// Cells that make up this row.
repeated TableCell cells = 1;
}
// A cell representation inside the table.
message TableCell {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1beta2.Document.Page.Table.TableCell].
Layout layout = 1;
// How many rows this cell spans.
int32 row_span = 2;
// How many columns this cell spans.
int32 col_span = 3;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;
}
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Table][google.cloud.documentai.v1beta2.Document.Page.Table].
Layout layout = 1;
// Header rows of the table.
repeated TableRow header_rows = 2;
// Body rows of the table.
repeated TableRow body_rows = 3;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;
}
// A form field detected on the page.
message FormField {
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] name. e.g. `Address`, `Email`,
// `Grand total`, `Phone number`, etc.
Layout field_name = 1;
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] value.
Layout field_value = 2;
// A list of detected languages for name together with confidence.
repeated DetectedLanguage name_detected_languages = 3;
// A list of detected languages for value together with confidence.
repeated DetectedLanguage value_detected_languages = 4;
// If the value is non-textual, this field represents the type. Current
// valid values are:
// - blank (this indicates the field_value is normal text)
// - "unfilled_checkbox"
// - "filled_checkbox"
string value_type = 5;
// An internal field, created for Labeling UI to export key text.
string corrected_key_text = 6;
// An internal field, created for Labeling UI to export value text.
string corrected_value_text = 7;
}
// Detected language for a structural component.
message DetectedLanguage {
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
// information, see
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 1;
// Confidence of detected language. Range [0, 1].
float confidence = 2;
}
// 1-based index for current [Page][google.cloud.documentai.v1beta2.Document.Page] in a parent [Document][google.cloud.documentai.v1beta2.Document].
// Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta2.Document] for individual
// processing.
int32 page_number = 1;
// Physical dimension of the page.
Dimension dimension = 2;
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the page.
Layout layout = 3;
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;
// A list of visually detected text blocks on the page.
// A block has a set of lines (collected into paragraphs) that have a common
// line-spacing and orientation.
repeated Block blocks = 5;
// A list of visually detected text paragraphs on the page.
// A collection of lines that a human would perceive as a paragraph.
repeated Paragraph paragraphs = 6;
// A list of visually detected text lines on the page.
// A collection of tokens that a human would perceive as a line.
repeated Line lines = 7;
// A list of visually detected tokens on the page.
repeated Token tokens = 8;
// A list of detected non-text visual elements e.g. checkbox,
// signature etc. on the page.
repeated VisualElement visual_elements = 9;
// A list of visually detected tables on the page.
repeated Table tables = 10;
// A list of visually detected form fields on the page.
repeated FormField form_fields = 11;
}
// A phrase in the text that is a known entity type, such as a person, an
// organization, or location.
message Entity {
// Provenance of the entity.
// Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
TextAnchor text_anchor = 1;
// Entity type from a schema e.g. `Address`.
string type = 2;
// Text value in the document e.g. `1600 Amphitheatre Pkwy`.
string mention_text = 3;
// Deprecated. Use `id` field instead.
string mention_id = 4;
// Optional. Confidence of detected Schema entity. Range [0, 1].
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
// Optional. Represents the provenance of this entity wrt. the location on the
// page where it was found.
PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];
// Optional. Canonical id. This will be a unique value in the entity list
// for this document.
string id = 7 [(google.api.field_behavior) = OPTIONAL];
// Optional. Temporary field to store the bounding poly for short-term POCs. Used by
// the frontend only. Do not use before you talk to ybo@ and lukasr@.
BoundingPoly bounding_poly_for_demo_frontend = 8 [(google.api.field_behavior) = OPTIONAL];
}
// Relationship between [Entities][google.cloud.documentai.v1beta2.Document.Entity].
message EntityRelation {
// Subject entity id.
string subject_id = 1;
// Object entity id.
string object_id = 2;
// Relationship description.
string relation = 3;
}
// Text reference indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
message TextAnchor {
// A text segment in the [Document.text][google.cloud.documentai.v1beta2.Document.text]. The indices may be out of bounds
// which indicate that the text extends into another document shard for
// large sharded documents. See [ShardInfo.text_offset][google.cloud.documentai.v1beta2.Document.ShardInfo.text_offset]
message TextSegment {
// [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] start UTF-8 char index in the [Document.text][google.cloud.documentai.v1beta2.Document.text].
int64 start_index = 1;
// [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] half open end UTF-8 char index in the
// [Document.text][google.cloud.documentai.v1beta2.Document.text].
int64 end_index = 2;
}
// The text segments from the [Document.text][google.cloud.documentai.v1beta2.Document.text].
repeated TextSegment text_segments = 1;
}
// Referencing elements in [Document.pages][google.cloud.documentai.v1beta2.Document.pages].
message PageAnchor {
// Represents a weak reference to a page element within a document.
message PageRef {
// The type of layout that is being referenced.
enum LayoutType {
// Layout Unspecified.
LAYOUT_TYPE_UNSPECIFIED = 0;
// References a [Page.blocks][google.cloud.documentai.v1beta2.Document.Page.blocks] element.
BLOCK = 1;
// References a [Page.paragraphs][google.cloud.documentai.v1beta2.Document.Page.paragraphs] element.
PARAGRAPH = 2;
// References a [Page.lines][google.cloud.documentai.v1beta2.Document.Page.lines] element.
LINE = 3;
// References a [Page.tokens][google.cloud.documentai.v1beta2.Document.Page.tokens] element.
TOKEN = 4;
// References a [Page.visual_elements][google.cloud.documentai.v1beta2.Document.Page.visual_elements] element.
VISUAL_ELEMENT = 5;
// Refrrences a [Page.tables][google.cloud.documentai.v1beta2.Document.Page.tables] element.
TABLE = 6;
// References a [Page.form_fields][google.cloud.documentai.v1beta2.Document.Page.form_fields] element.
FORM_FIELD = 7;
}
// Required. Index into the [Document.pages][google.cloud.documentai.v1beta2.Document.pages] element
int64 page = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The type of the layout element that is being referenced. If not
// specified the whole page is assumed to be referenced.
LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. The [Page.Layout.id][google.cloud.documentai.v1beta2.Document.Page.Layout.id] on the page that this element
// references. If [LayoutRef.type][] is specified this id must also be
// specified.
string layout_id = 3 [(google.api.field_behavior) = OPTIONAL];
}
// One or more references to visual page elements
repeated PageRef page_refs = 1;
}
// Original source document from the user.
oneof source {
// Currently supports Google Cloud Storage URI of the form
// `gs://bucket_name/object_name`. Object versioning is not supported.
// See [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more
// info.
string uri = 1;
// Inline document content, represented as a stream of bytes.
// Note: As with all `bytes` fields, protobuffers use a pure binary
// representation, whereas JSON representations use base64.
bytes content = 2;
}
// An IANA published MIME type (also referred to as media type). For more
// information, see
// https://www.iana.org/assignments/media-types/media-types.xhtml.
string mime_type = 3;
// UTF-8 encoded text in reading order from the document.
string text = 4;
// Styles for the [Document.text][google.cloud.documentai.v1beta2.Document.text].
repeated Style text_styles = 5;
// Visual page layout for the [Document][google.cloud.documentai.v1beta2.Document].
repeated Page pages = 6;
// A list of entities detected on [Document.text][google.cloud.documentai.v1beta2.Document.text]. For document shards,
// entities in this list may cross shard boundaries.
repeated Entity entities = 7;
// Relationship among [Document.entities][google.cloud.documentai.v1beta2.Document.entities].
repeated EntityRelation entity_relations = 8;
// Information about the sharding if this document is sharded part of a larger
// document. If the document is not sharded, this message is not specified.
ShardInfo shard_info = 9;
// [Label][google.cloud.documentai.v1beta2.Document.Label]s for this document.
repeated Label labels = 11;
// Any error that occurred while processing this document.
google.rpc.Status error = 10;
}

View File

@ -0,0 +1,343 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta2;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/cloud/documentai/v1beta2/document.proto";
import "google/cloud/documentai/v1beta2/geometry.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/timestamp.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiProto";
option java_package = "com.google.cloud.documentai.v1beta2";
// Service to parse structured information from unstructured or semi-structured
// documents using state-of-the-art Google AI such as natural language,
// computer vision, and translation.
service DocumentUnderstandingService {
option (google.api.default_host) = "us-documentai.googleapis.com";
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
// LRO endpoint to batch process many documents. The output is written
// to Cloud Storage as JSON in the [Document] format.
rpc BatchProcessDocuments(BatchProcessDocumentsRequest) returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1beta2/{parent=projects/*/locations/*}/documents:batchProcess"
body: "*"
additional_bindings {
post: "/v1beta2/{parent=projects/*}/documents:batchProcess"
body: "*"
}
};
option (google.api.method_signature) = "requests";
option (google.longrunning.operation_info) = {
response_type: "BatchProcessDocumentsResponse"
metadata_type: "OperationMetadata"
};
}
// Processes a single document.
rpc ProcessDocument(ProcessDocumentRequest) returns (Document) {
option (google.api.http) = {
post: "/v1beta2/{parent=projects/*/locations/*}/documents:process"
body: "*"
additional_bindings {
post: "/v1beta2/{parent=projects/*}/documents:process"
body: "*"
}
};
}
}
// Request to batch process documents as an asynchronous operation. The output
// is written to Cloud Storage as JSON in the [Document] format.
message BatchProcessDocumentsRequest {
// Required. Individual requests for each document.
repeated ProcessDocumentRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
// Target project and location to make a call.
//
// Format: `projects/{project-id}/locations/{location-id}`.
//
// If no location is specified, a region will be chosen automatically.
string parent = 2;
}
// Request to process one document.
message ProcessDocumentRequest {
// Target project and location to make a call.
//
// Format: `projects/{project-id}/locations/{location-id}`.
//
// If no location is specified, a region will be chosen automatically.
// This field is only populated when used in ProcessDocument method.
string parent = 9;
// Required. Information about the input file.
InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED];
// Optional. The desired output location. This field is only needed in
// BatchProcessDocumentsRequest.
OutputConfig output_config = 2 [(google.api.field_behavior) = OPTIONAL];
// Specifies a known document type for deeper structure detection. Valid
// values are currently "general" and "invoice". If not provided, "general"\
// is used as default. If any other value is given, the request is rejected.
string document_type = 3;
// Controls table extraction behavior. If not specified, the system will
// decide reasonable defaults.
TableExtractionParams table_extraction_params = 4;
// Controls form extraction behavior. If not specified, the system will
// decide reasonable defaults.
FormExtractionParams form_extraction_params = 5;
// Controls entity extraction behavior. If not specified, the system will
// decide reasonable defaults.
EntityExtractionParams entity_extraction_params = 6;
// Controls OCR behavior. If not specified, the system will decide reasonable
// defaults.
OcrParams ocr_params = 7;
// Controls AutoML model prediction behavior. AutoMlParams cannot be used
// together with other Params.
AutoMlParams automl_params = 8;
}
// Response to an batch document processing request. This is returned in
// the LRO Operation after the operation is complete.
message BatchProcessDocumentsResponse {
// Responses for each individual document.
repeated ProcessDocumentResponse responses = 1;
}
// Response to a single document processing request.
message ProcessDocumentResponse {
// Information about the input file. This is the same as the corresponding
// input config in the request.
InputConfig input_config = 1;
// The output location of the parsed responses. The responses are written to
// this location as JSON-serialized `Document` objects.
OutputConfig output_config = 2;
}
// Parameters to control Optical Character Recognition (OCR) behavior.
message OcrParams {
// List of languages to use for OCR. In most cases, an empty value
// yields the best results since it enables automatic language detection. For
// languages based on the Latin alphabet, setting `language_hints` is not
// needed. In rare cases, when the language of the text in the image is known,
// setting a hint will help get better results (although it will be a
// significant hindrance if the hint is wrong). Document processing returns an
// error if one or more of the specified languages is not one of the
// supported languages.
repeated string language_hints = 1;
}
// Parameters to control table extraction behavior.
message TableExtractionParams {
// Whether to enable table extraction.
bool enabled = 1;
// Optional. Table bounding box hints that can be provided to complex cases
// which our algorithm cannot locate the table(s) in.
repeated TableBoundHint table_bound_hints = 2 [(google.api.field_behavior) = OPTIONAL];
// Optional. Table header hints. The extraction will bias towards producing
// these terms as table headers, which may improve accuracy.
repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL];
// Model version of the table extraction system. Default is "builtin/stable".
// Specify "builtin/latest" for the latest model.
string model_version = 4;
}
// A hint for a table bounding box on the page for table parsing.
message TableBoundHint {
// Optional. Page number for multi-paged inputs this hint applies to. If not
// provided, this hint will apply to all pages by default. This value is
// 1-based.
int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL];
// Bounding box hint for a table on this page. The coordinates must be
// normalized to [0,1] and the bounding box must be an axis-aligned rectangle.
BoundingPoly bounding_box = 2;
}
// Parameters to control form extraction behavior.
message FormExtractionParams {
// Whether to enable form extraction.
bool enabled = 1;
// User can provide pairs of (key text, value type) to improve the parsing
// result.
//
// For example, if a document has a field called "Date" that holds a date
// value and a field called "Amount" that may hold either a currency value
// (e.g., "$500.00") or a simple number value (e.g., "20"), you could use the
// following hints: [ {"key": "Date", value_types: [ "DATE"]}, {"key":
// "Amount", "value_types": [ "PRICE", "NUMBER" ]} ]
//
// If the value type is unknown, but you want to provide hints for the keys,
// you can leave the value_types field blank. e.g. {"key": "Date",
// "value_types": []}
repeated KeyValuePairHint key_value_pair_hints = 2;
// Model version of the form extraction system. Default is
// "builtin/stable". Specify "builtin/latest" for the latest model.
// For custom form models, specify: custom/{model_name}". Model name
// format is "bucket_name/path/to/modeldir" corresponding to
// "gs://bucket_name/path/to/modeldir" where annotated examples are stored.
string model_version = 3;
}
// User-provided hint for key value pair.
message KeyValuePairHint {
// The key text for the hint.
string key = 1;
// Type of the value. This is case-insensitive, and could be one of:
// ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER,
// ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will
// be ignored.
repeated string value_types = 2;
}
// Parameters to control entity extraction behavior.
message EntityExtractionParams {
// Whether to enable entity extraction.
bool enabled = 1;
// Model version of the entity extraction. Default is
// "builtin/stable". Specify "builtin/latest" for the latest model.
string model_version = 2;
}
// Parameters to control AutoML model prediction behavior.
message AutoMlParams {
// Resource name of the AutoML model.
//
// Format: `projects/{project-id}/locations/{location-id}/models/{model-id}`.
string model = 1;
}
// The desired input location and metadata.
message InputConfig {
// Required.
oneof source {
// The Google Cloud Storage location to read the input from. This must be a
// single file.
GcsSource gcs_source = 1;
// Content in bytes, represented as a stream of bytes.
// Note: As with all `bytes` fields, proto buffer messages use a pure binary
// representation, whereas JSON representations use base64.
//
// This field only works for synchronous ProcessDocument method.
bytes contents = 3;
}
// Required. Mimetype of the input. Current supported mimetypes are application/pdf,
// image/tiff, and image/gif.
// In addition, application/json type is supported for requests with
// [ProcessDocumentRequest.automl_params][google.cloud.documentai.v1beta2.ProcessDocumentRequest.automl_params] field set. The JSON file needs to
// be in [Document][google.cloud.documentai.v1beta2.Document] format.
string mime_type = 2 [(google.api.field_behavior) = REQUIRED];
}
// The desired output location and metadata.
message OutputConfig {
// Required.
oneof destination {
// The Google Cloud Storage location to write the output to.
GcsDestination gcs_destination = 1;
}
// The max number of pages to include into each output Document shard JSON on
// Google Cloud Storage.
//
// The valid range is [1, 100]. If not specified, the default value is 20.
//
// For example, for one pdf file with 100 pages, 100 parsed pages will be
// produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each
// containing 20 parsed pages will be written under the prefix
// [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where
// x and y are 1-indexed page numbers.
//
// Example GCS outputs with 157 pages and pages_per_shard = 50:
//
// <prefix>pages-001-to-050.json
// <prefix>pages-051-to-100.json
// <prefix>pages-101-to-150.json
// <prefix>pages-151-to-157.json
int32 pages_per_shard = 2;
}
// The Google Cloud Storage location where the input file will be read from.
message GcsSource {
string uri = 1 [(google.api.field_behavior) = REQUIRED];
}
// The Google Cloud Storage location where the output file will be written to.
message GcsDestination {
string uri = 1 [(google.api.field_behavior) = REQUIRED];
}
// Contains metadata for the BatchProcessDocuments operation.
message OperationMetadata {
enum State {
// The default value. This value is used if the state is omitted.
STATE_UNSPECIFIED = 0;
// Request is received.
ACCEPTED = 1;
// Request operation is waiting for scheduling.
WAITING = 2;
// Request is being processed.
RUNNING = 3;
// The batch processing completed successfully.
SUCCEEDED = 4;
// The batch processing was cancelled.
CANCELLED = 5;
// The batch processing has failed.
FAILED = 6;
}
// The state of the current batch processing.
State state = 1;
// A message providing more details about the current state of processing.
string state_message = 2;
// The creation time of the operation.
google.protobuf.Timestamp create_time = 3;
// The last update time of the operation.
google.protobuf.Timestamp update_time = 4;
}

View File

@ -0,0 +1,118 @@
type: com.google.api.codegen.ConfigProto
config_schema_version: 1.0.0
# The settings of generated code in a specific language.
language_settings:
java:
package_name: com.google.cloud.documentai.v1beta2
python:
package_name: google.cloud.documentai_v1beta2.gapic
go:
package_name: cloud.google.com/go/documentai/apiv1beta2
csharp:
package_name: Google.Cloud.DocumentAi.V1beta2
ruby:
package_name: Google::Cloud::DocumentAi::V1beta2
php:
package_name: Google\Cloud\DocumentAi\V1beta2
nodejs:
package_name: documentai.v1beta2
# A list of API interface configurations.
interfaces:
# The fully qualified name of the API interface.
- name: google.cloud.documentai.v1beta2.DocumentUnderstandingService
# A list of resource collection configurations.
# Consists of a name_pattern and an entity_name.
# The name_pattern is a pattern to describe the names of the resources of this
# collection, using the platform's conventions for URI patterns. A generator
# may use this to generate methods to compose and decompose such names. The
# pattern should use named placeholders as in `shelves/{shelf}/books/{book}`;
# those will be taken as hints for the parameter names of the generated
# methods. If empty, no name methods are generated.
# The entity_name is the name to be used as a basis for generated methods and
# classes.
collections: []
# Definition for retryable codes.
retry_codes_def:
- name: idempotent
retry_codes:
- DEADLINE_EXCEEDED
- UNAVAILABLE
- name: non_idempotent
retry_codes: []
# Definition for retry/backoff parameters.
retry_params_def:
- name: default
initial_retry_delay_millis: 100
retry_delay_multiplier: 1.3
max_retry_delay_millis: 60000
initial_rpc_timeout_millis: 20000
rpc_timeout_multiplier: 1
max_rpc_timeout_millis: 20000
total_timeout_millis: 600000
# A list of method configurations.
# Common properties:
#
# name - The simple name of the method.
#
# flattening - Specifies the configuration for parameter flattening.
# Describes the parameter groups for which a generator should produce method
# overloads which allow a client to directly pass request message fields as
# method parameters. This information may or may not be used, depending on
# the target language.
# Consists of groups, which each represent a list of parameters to be
# flattened. Each parameter listed must be a field of the request message.
#
# required_fields - Fields that are always required for a request to be
# valid.
#
# page_streaming - Specifies the configuration for paging.
# Describes information for generating a method which transforms a paging
# list RPC into a stream of resources.
# Consists of a request and a response.
# The request specifies request information of the list method. It defines
# which fields match the paging pattern in the request. The request consists
# of a page_size_field and a token_field. The page_size_field is the name of
# the optional field specifying the maximum number of elements to be
# returned in the response. The token_field is the name of the field in the
# request containing the page token.
# The response specifies response information of the list method. It defines
# which fields match the paging pattern in the response. The response
# consists of a token_field and a resources_field. The token_field is the
# name of the field in the response containing the next page token. The
# resources_field is the name of the field in the response containing the
# list of resources belonging to the page.
#
# retry_codes_name - Specifies the configuration for retryable codes. The
# name must be defined in interfaces.retry_codes_def.
#
# retry_params_name - Specifies the configuration for retry/backoff
# parameters. The name must be defined in interfaces.retry_params_def.
#
# field_name_patterns - Maps the field name of the request type to
# entity_name of interfaces.collections.
# Specifies the string pattern that the field must follow.
#
# timeout_millis - Specifies the default timeout for a non-retrying call. If
# the call is retrying, refer to retry_params_name instead.
methods:
- name: BatchProcessDocuments
flattening:
groups:
- parameters:
- requests
required_fields:
- requests
retry_codes_name: idempotent
retry_params_name: default
long_running:
return_type: google.cloud.documentai.v1beta2.BatchProcessDocumentsResponse
metadata_type: google.cloud.documentai.v1beta2.OperationMetadata
initial_poll_delay_millis: 20000
poll_delay_multiplier: 1.5
max_poll_delay_millis: 45000
total_poll_timeout_millis: 86400000
timeout_millis: 60000
- name: ProcessDocument
retry_codes_name: idempotent
retry_params_name: default
timeout_millis: 60000

View File

@ -0,0 +1,33 @@
type: google.api.Service
config_version: 3
name: documentai.googleapis.com
title: Cloud Document AI API
apis:
- name: google.cloud.documentai.v1beta2.DocumentUnderstandingService
types:
- name: google.cloud.documentai.v1beta2.BatchProcessDocumentsResponse
- name: google.cloud.documentai.v1beta2.Document
- name: google.cloud.documentai.v1beta2.OperationMetadata
documentation:
summary: |-
Service to parse structured information from unstructured or
semi-structured documents using state-of-the-art Google AI such as natural
language, computer vision, translation, and AutoML.
authentication:
rules:
- selector: google.cloud.documentai.v1beta2.DocumentUnderstandingService.BatchProcessDocuments
oauth:
canonical_scopes: |-
https://www.googleapis.com/auth/cloud-platform
- selector: google.cloud.documentai.v1beta2.DocumentUnderstandingService.ProcessDocument
oauth:
canonical_scopes: |-
https://www.googleapis.com/auth/cloud-platform
- selector: 'google.longrunning.Operations.*'
oauth:
canonical_scopes: |-
https://www.googleapis.com/auth/cloud-platform

View File

@ -0,0 +1,26 @@
{
"methodConfig": [
{
"name": [
{
"service": "google.cloud.documentai.v1beta2.DocumentUnderstandingService",
"method": "BatchProcessDocuments"
},
{
"service": "google.cloud.documentai.v1beta2.DocumentUnderstandingService",
"method": "ProcessDocument"
}
],
"timeout": "60s",
"retryPolicy": {
"initialBackoff": "0.100s",
"maxBackoff": "60s",
"backoffMultiplier": 1.3,
"retryableStatusCodes": [
"DEADLINE_EXCEEDED",
"UNAVAILABLE"
]
}
}
]
}

View File

@ -0,0 +1,54 @@
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.documentai.v1beta2;
import "google/api/annotations.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
option java_multiple_files = true;
option java_outer_classname = "GeometryProto";
option java_package = "com.google.cloud.documentai.v1beta2";
// A vertex represents a 2D point in the image.
// NOTE: the vertex coordinates are in the same scale as the original image.
message Vertex {
// X coordinate.
int32 x = 1;
// Y coordinate.
int32 y = 2;
}
// A vertex represents a 2D point in the image.
// NOTE: the normalized vertex coordinates are relative to the original image
// and range from 0 to 1.
message NormalizedVertex {
// X coordinate.
float x = 1;
// Y coordinate.
float y = 2;
}
// A bounding polygon for the detected image annotation.
message BoundingPoly {
// The bounding polygon vertices.
repeated Vertex vertices = 1;
// The bounding polygon normalized vertices.
repeated NormalizedVertex normalized_vertices = 2;
}