feat: publish documentai/v1beta2 protos
PiperOrigin-RevId: 300656808
This commit is contained in:
parent
5202a9e0d9
commit
c6fbac11af
|
|
@ -0,0 +1,372 @@
|
|||
# This file was automatically generated by BuildFileGenerator
|
||||
|
||||
# This is an API workspace, having public visibility by default makes perfect sense.
|
||||
package(default_visibility = ["//visibility:public"])
|
||||
|
||||
##############################################################################
|
||||
# Common
|
||||
##############################################################################
|
||||
load("@rules_proto//proto:defs.bzl", "proto_library")
|
||||
load("@com_google_googleapis_imports//:imports.bzl", "proto_library_with_info")
|
||||
|
||||
proto_library(
|
||||
name = "documentai_proto",
|
||||
srcs = [
|
||||
"document.proto",
|
||||
"document_understanding.proto",
|
||||
"geometry.proto",
|
||||
],
|
||||
deps = [
|
||||
"//google/api:annotations_proto",
|
||||
"//google/api:client_proto",
|
||||
"//google/api:field_behavior_proto",
|
||||
"//google/longrunning:operations_proto",
|
||||
"//google/rpc:status_proto",
|
||||
"//google/type:color_proto",
|
||||
"@com_google_protobuf//:timestamp_proto",
|
||||
],
|
||||
)
|
||||
|
||||
proto_library_with_info(
|
||||
name = "documentai_proto_with_info",
|
||||
deps = [
|
||||
":documentai_proto",
|
||||
"//google/cloud:common_resources_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# Java
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"java_gapic_assembly_gradle_pkg",
|
||||
"java_gapic_library",
|
||||
"java_gapic_test",
|
||||
"java_grpc_library",
|
||||
"java_proto_library",
|
||||
)
|
||||
|
||||
java_proto_library(
|
||||
name = "documentai_java_proto",
|
||||
deps = [":documentai_proto"],
|
||||
)
|
||||
|
||||
java_grpc_library(
|
||||
name = "documentai_java_grpc",
|
||||
srcs = [":documentai_proto"],
|
||||
deps = [":documentai_java_proto"],
|
||||
)
|
||||
|
||||
java_gapic_library(
|
||||
name = "documentai_java_gapic",
|
||||
src = ":documentai_proto_with_info",
|
||||
gapic_yaml = "documentai_gapic.yaml",
|
||||
package = "google.cloud.documentai.v1beta2",
|
||||
service_yaml = "documentai_v1beta2.yaml",
|
||||
test_deps = [
|
||||
":documentai_java_grpc",
|
||||
],
|
||||
deps = [
|
||||
":documentai_java_proto",
|
||||
],
|
||||
)
|
||||
|
||||
java_gapic_test(
|
||||
name = "documentai_java_gapic_test_suite",
|
||||
test_classes = [
|
||||
"com.google.cloud.documentai.v1beta2.DocumentUnderstandingServiceClientTest",
|
||||
],
|
||||
runtime_deps = [":documentai_java_gapic_test"],
|
||||
)
|
||||
|
||||
# Open Source Packages
|
||||
java_gapic_assembly_gradle_pkg(
|
||||
name = "google-cloud-documentai-v1beta2-java",
|
||||
deps = [
|
||||
":documentai_java_gapic",
|
||||
":documentai_java_grpc",
|
||||
":documentai_java_proto",
|
||||
":documentai_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# Go
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"go_gapic_assembly_pkg",
|
||||
"go_gapic_library",
|
||||
"go_proto_library",
|
||||
"go_test",
|
||||
)
|
||||
|
||||
go_proto_library(
|
||||
name = "documentai_go_proto",
|
||||
compilers = ["@io_bazel_rules_go//proto:go_grpc"],
|
||||
importpath = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2",
|
||||
protos = [":documentai_proto"],
|
||||
deps = [
|
||||
"//google/api:annotations_go_proto",
|
||||
"//google/longrunning:longrunning_go_proto",
|
||||
"//google/rpc:status_go_proto",
|
||||
"//google/type:color_go_proto",
|
||||
],
|
||||
)
|
||||
|
||||
go_gapic_library(
|
||||
name = "documentai_go_gapic",
|
||||
src = ":documentai_proto_with_info",
|
||||
gapic_yaml = "documentai_gapic.yaml",
|
||||
importpath = "cloud.google.com/go/documentai/apiv1beta2",
|
||||
package = "google.cloud.documentai.v1beta2",
|
||||
service_yaml = "documentai_v1beta2.yaml",
|
||||
deps = [
|
||||
":documentai_go_proto",
|
||||
"//google/longrunning:longrunning_go_gapic",
|
||||
"//google/longrunning:longrunning_go_proto",
|
||||
"@com_google_cloud_go//longrunning:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
go_test(
|
||||
name = "documentai_go_gapic_test",
|
||||
srcs = [":documentai_go_gapic_srcjar_test"],
|
||||
embed = [":documentai_go_gapic"],
|
||||
importpath = "cloud.google.com/go/documentai/apiv1beta2",
|
||||
)
|
||||
|
||||
# Open Source Packages
|
||||
go_gapic_assembly_pkg(
|
||||
name = "gapi-cloud-documentai-v1beta2-go",
|
||||
deps = [
|
||||
":documentai_go_gapic",
|
||||
":documentai_go_gapic_srcjar-smoke-test.srcjar",
|
||||
":documentai_go_gapic_srcjar-test.srcjar",
|
||||
":documentai_go_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# Python
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"moved_proto_library",
|
||||
"py_gapic_assembly_pkg",
|
||||
"py_gapic_library",
|
||||
"py_grpc_library",
|
||||
"py_proto_library",
|
||||
)
|
||||
|
||||
moved_proto_library(
|
||||
name = "documentai_moved_proto",
|
||||
srcs = [":documentai_proto"],
|
||||
deps = [
|
||||
"//google/api:annotations_proto",
|
||||
"//google/api:client_proto",
|
||||
"//google/api:field_behavior_proto",
|
||||
"//google/longrunning:operations_proto",
|
||||
"//google/rpc:status_proto",
|
||||
"//google/type:color_proto",
|
||||
"@com_google_protobuf//:timestamp_proto",
|
||||
],
|
||||
)
|
||||
|
||||
py_proto_library(
|
||||
name = "documentai_py_proto",
|
||||
plugin = "@protoc_docs_plugin//:docs_plugin",
|
||||
deps = [":documentai_moved_proto"],
|
||||
)
|
||||
|
||||
py_grpc_library(
|
||||
name = "documentai_py_grpc",
|
||||
srcs = [":documentai_moved_proto"],
|
||||
deps = [":documentai_py_proto"],
|
||||
)
|
||||
|
||||
py_gapic_library(
|
||||
name = "documentai_py_gapic",
|
||||
src = ":documentai_proto_with_info",
|
||||
gapic_yaml = "documentai_gapic.yaml",
|
||||
package = "google.cloud.documentai.v1beta2",
|
||||
service_yaml = "documentai_v1beta2.yaml",
|
||||
deps = [
|
||||
":documentai_py_grpc",
|
||||
":documentai_py_proto",
|
||||
],
|
||||
)
|
||||
|
||||
# Open Source Packages
|
||||
py_gapic_assembly_pkg(
|
||||
name = "documentai-v1beta2-py",
|
||||
deps = [
|
||||
":documentai_py_gapic",
|
||||
":documentai_py_grpc",
|
||||
":documentai_py_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# PHP
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"php_gapic_assembly_pkg",
|
||||
"php_gapic_library",
|
||||
"php_grpc_library",
|
||||
"php_proto_library",
|
||||
)
|
||||
|
||||
php_proto_library(
|
||||
name = "documentai_php_proto",
|
||||
deps = [":documentai_proto"],
|
||||
)
|
||||
|
||||
php_grpc_library(
|
||||
name = "documentai_php_grpc",
|
||||
srcs = [":documentai_proto"],
|
||||
deps = [":documentai_php_proto"],
|
||||
)
|
||||
|
||||
php_gapic_library(
|
||||
name = "documentai_php_gapic",
|
||||
src = ":documentai_proto_with_info",
|
||||
gapic_yaml = "documentai_gapic.yaml",
|
||||
package = "google.cloud.documentai.v1beta2",
|
||||
service_yaml = "documentai_v1beta2.yaml",
|
||||
deps = [
|
||||
":documentai_php_grpc",
|
||||
":documentai_php_proto",
|
||||
],
|
||||
)
|
||||
|
||||
# Open Source Packages
|
||||
php_gapic_assembly_pkg(
|
||||
name = "google-cloud-documentai-v1beta2-php",
|
||||
deps = [
|
||||
":documentai_php_gapic",
|
||||
":documentai_php_grpc",
|
||||
":documentai_php_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# Node.js
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"nodejs_gapic_assembly_pkg",
|
||||
"nodejs_gapic_library",
|
||||
)
|
||||
|
||||
nodejs_gapic_library(
|
||||
name = "documentai_nodejs_gapic",
|
||||
src = ":documentai_proto_with_info",
|
||||
gapic_yaml = "documentai_gapic.yaml",
|
||||
package = "google.cloud.documentai.v1beta2",
|
||||
service_yaml = "documentai_v1beta2.yaml",
|
||||
deps = [],
|
||||
)
|
||||
|
||||
nodejs_gapic_assembly_pkg(
|
||||
name = "documentai-v1beta2-nodejs",
|
||||
deps = [
|
||||
":documentai_nodejs_gapic",
|
||||
":documentai_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# Ruby
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"ruby_gapic_assembly_pkg",
|
||||
"ruby_gapic_library",
|
||||
"ruby_grpc_library",
|
||||
"ruby_proto_library",
|
||||
)
|
||||
|
||||
ruby_proto_library(
|
||||
name = "documentai_ruby_proto",
|
||||
deps = [":documentai_proto"],
|
||||
)
|
||||
|
||||
ruby_grpc_library(
|
||||
name = "documentai_ruby_grpc",
|
||||
srcs = [":documentai_proto"],
|
||||
deps = [":documentai_ruby_proto"],
|
||||
)
|
||||
|
||||
ruby_gapic_library(
|
||||
name = "documentai_ruby_gapic",
|
||||
src = ":documentai_proto_with_info",
|
||||
gapic_yaml = "documentai_gapic.yaml",
|
||||
package = "google.cloud.documentai.v1beta2",
|
||||
service_yaml = "documentai_v1beta2.yaml",
|
||||
deps = [
|
||||
":documentai_ruby_grpc",
|
||||
":documentai_ruby_proto",
|
||||
],
|
||||
)
|
||||
|
||||
# Open Source Packages
|
||||
ruby_gapic_assembly_pkg(
|
||||
name = "google-cloud-documentai-v1beta2-ruby",
|
||||
deps = [
|
||||
":documentai_ruby_gapic",
|
||||
":documentai_ruby_grpc",
|
||||
":documentai_ruby_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# C#
|
||||
##############################################################################
|
||||
load(
|
||||
"@com_google_googleapis_imports//:imports.bzl",
|
||||
"csharp_gapic_assembly_pkg",
|
||||
"csharp_gapic_library",
|
||||
"csharp_grpc_library",
|
||||
"csharp_proto_library",
|
||||
)
|
||||
|
||||
csharp_proto_library(
|
||||
name = "documentai_csharp_proto",
|
||||
deps = [":documentai_proto"],
|
||||
)
|
||||
|
||||
csharp_grpc_library(
|
||||
name = "documentai_csharp_grpc",
|
||||
srcs = [":documentai_proto"],
|
||||
deps = [":documentai_csharp_proto"],
|
||||
)
|
||||
|
||||
csharp_gapic_library(
|
||||
name = "documentai_csharp_gapic",
|
||||
src = ":documentai_proto_with_info",
|
||||
gapic_yaml = "documentai_gapic.yaml",
|
||||
package = "google.cloud.documentai.v1beta2",
|
||||
service_yaml = "documentai_v1beta2.yaml",
|
||||
deps = [
|
||||
":documentai_csharp_grpc",
|
||||
":documentai_csharp_proto",
|
||||
],
|
||||
)
|
||||
|
||||
# Open Source Packages
|
||||
csharp_gapic_assembly_pkg(
|
||||
name = "google-cloud-documentai-v1beta2-csharp",
|
||||
deps = [
|
||||
":documentai_csharp_gapic",
|
||||
":documentai_csharp_grpc",
|
||||
":documentai_csharp_proto",
|
||||
],
|
||||
)
|
||||
|
||||
##############################################################################
|
||||
# C++
|
||||
##############################################################################
|
||||
# Put your C++ rules here
|
||||
|
|
@ -0,0 +1,516 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.cloud.documentai.v1beta2;
|
||||
|
||||
import "google/api/field_behavior.proto";
|
||||
import "google/cloud/documentai/v1beta2/geometry.proto";
|
||||
import "google/rpc/status.proto";
|
||||
import "google/type/color.proto";
|
||||
import "google/api/annotations.proto";
|
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
|
||||
option java_multiple_files = true;
|
||||
option java_outer_classname = "DocumentProto";
|
||||
option java_package = "com.google.cloud.documentai.v1beta2";
|
||||
|
||||
// Document represents the canonical document resource in Document Understanding
|
||||
// AI.
|
||||
// It is an interchange format that provides insights into documents and allows
|
||||
// for collaboration between users and Document Understanding AI to iterate and
|
||||
// optimize for quality.
|
||||
message Document {
|
||||
// For a large document, sharding may be performed to produce several
|
||||
// document shards. Each document shard contains this field to detail which
|
||||
// shard it is.
|
||||
message ShardInfo {
|
||||
// The 0-based index of this shard.
|
||||
int64 shard_index = 1;
|
||||
|
||||
// Total number of shards.
|
||||
int64 shard_count = 2;
|
||||
|
||||
// The index of the first character in [Document.text][google.cloud.documentai.v1beta2.Document.text] in the overall
|
||||
// document global text.
|
||||
int64 text_offset = 3;
|
||||
}
|
||||
|
||||
// Label attaches schema information and/or other metadata to segments within
|
||||
// a [Document][google.cloud.documentai.v1beta2.Document]. Multiple [Label][google.cloud.documentai.v1beta2.Document.Label]s on a single field can denote either
|
||||
// different labels, different instances of the same label created at
|
||||
// different times, or some combination of both.
|
||||
message Label {
|
||||
// Provenance of the label.
|
||||
oneof source {
|
||||
// Label is generated AutoML model. This field stores the full resource
|
||||
// name of the AutoML model.
|
||||
//
|
||||
// Format:
|
||||
// `projects/{project-id}/locations/{location-id}/models/{model-id}`
|
||||
string automl_model = 2;
|
||||
}
|
||||
|
||||
// Name of the label.
|
||||
//
|
||||
// When the label is generated from AutoML Text Classification model, this
|
||||
// field represents the name of the category.
|
||||
string name = 1;
|
||||
|
||||
// Confidence score between 0 and 1 for label assignment.
|
||||
float confidence = 3;
|
||||
}
|
||||
|
||||
// Annotation for common text style attributes. This adheres to CSS
|
||||
// conventions as much as possible.
|
||||
message Style {
|
||||
// Font size with unit.
|
||||
message FontSize {
|
||||
// Font size for the text.
|
||||
float size = 1;
|
||||
|
||||
// Unit for the font size. Follows CSS naming (in, px, pt, etc.).
|
||||
string unit = 2;
|
||||
}
|
||||
|
||||
// Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
|
||||
TextAnchor text_anchor = 1;
|
||||
|
||||
// Text color.
|
||||
google.type.Color color = 2;
|
||||
|
||||
// Text background color.
|
||||
google.type.Color background_color = 3;
|
||||
|
||||
// Font weight. Possible values are normal, bold, bolder, and lighter.
|
||||
// https://www.w3schools.com/cssref/pr_font_weight.asp
|
||||
string font_weight = 4;
|
||||
|
||||
// Text style. Possible values are normal, italic, and oblique.
|
||||
// https://www.w3schools.com/cssref/pr_font_font-style.asp
|
||||
string text_style = 5;
|
||||
|
||||
// Text decoration. Follows CSS standard.
|
||||
// <text-decoration-line> <text-decoration-color> <text-decoration-style>
|
||||
// https://www.w3schools.com/cssref/pr_text_text-decoration.asp
|
||||
string text_decoration = 6;
|
||||
|
||||
// Font size.
|
||||
FontSize font_size = 7;
|
||||
}
|
||||
|
||||
// A page in a [Document][google.cloud.documentai.v1beta2.Document].
|
||||
message Page {
|
||||
// Dimension for the page.
|
||||
message Dimension {
|
||||
// Page width.
|
||||
float width = 1;
|
||||
|
||||
// Page height.
|
||||
float height = 2;
|
||||
|
||||
// Dimension unit.
|
||||
string unit = 3;
|
||||
}
|
||||
|
||||
// Visual element describing a layout unit on a page.
|
||||
message Layout {
|
||||
// Detected human reading orientation.
|
||||
enum Orientation {
|
||||
// Unspecified orientation.
|
||||
ORIENTATION_UNSPECIFIED = 0;
|
||||
|
||||
// Orientation is aligned with page up.
|
||||
PAGE_UP = 1;
|
||||
|
||||
// Orientation is aligned with page right.
|
||||
// Turn the head 90 degrees clockwise from upright to read.
|
||||
PAGE_RIGHT = 2;
|
||||
|
||||
// Orientation is aligned with page down.
|
||||
// Turn the head 180 degrees from upright to read.
|
||||
PAGE_DOWN = 3;
|
||||
|
||||
// Orientation is aligned with page left.
|
||||
// Turn the head 90 degrees counterclockwise from upright to read.
|
||||
PAGE_LEFT = 4;
|
||||
}
|
||||
|
||||
// Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
|
||||
TextAnchor text_anchor = 1;
|
||||
|
||||
// Confidence of the current [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] within context of the object this
|
||||
// layout is for. e.g. confidence can be for a single token, a table,
|
||||
// a visual element, etc. depending on context. Range [0, 1].
|
||||
float confidence = 2;
|
||||
|
||||
// The bounding polygon for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout].
|
||||
BoundingPoly bounding_poly = 3;
|
||||
|
||||
// Detected orientation for the [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout].
|
||||
Orientation orientation = 4;
|
||||
|
||||
// Optional. This is the identifier used by referencing [PageAnchor][google.cloud.documentai.v1beta2.Document.PageAnchor]s.
|
||||
string id = 5 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// A block has a set of lines (collected into paragraphs) that have a
|
||||
// common line-spacing and orientation.
|
||||
message Block {
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Block][google.cloud.documentai.v1beta2.Document.Page.Block].
|
||||
Layout layout = 1;
|
||||
|
||||
// A list of detected languages together with confidence.
|
||||
repeated DetectedLanguage detected_languages = 2;
|
||||
}
|
||||
|
||||
// A collection of lines that a human would perceive as a paragraph.
|
||||
message Paragraph {
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Paragraph][google.cloud.documentai.v1beta2.Document.Page.Paragraph].
|
||||
Layout layout = 1;
|
||||
|
||||
// A list of detected languages together with confidence.
|
||||
repeated DetectedLanguage detected_languages = 2;
|
||||
}
|
||||
|
||||
// A collection of tokens that a human would perceive as a line.
|
||||
// Does not cross column boundaries, can be horizontal, vertical, etc.
|
||||
message Line {
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Line][google.cloud.documentai.v1beta2.Document.Page.Line].
|
||||
Layout layout = 1;
|
||||
|
||||
// A list of detected languages together with confidence.
|
||||
repeated DetectedLanguage detected_languages = 2;
|
||||
}
|
||||
|
||||
// A detected token.
|
||||
message Token {
|
||||
// Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
|
||||
message DetectedBreak {
|
||||
// Enum to denote the type of break found.
|
||||
enum Type {
|
||||
// Unspecified break type.
|
||||
TYPE_UNSPECIFIED = 0;
|
||||
|
||||
// A single whitespace.
|
||||
SPACE = 1;
|
||||
|
||||
// A wider whitespace.
|
||||
WIDE_SPACE = 2;
|
||||
|
||||
// A hyphen that indicates that a token has been split across lines.
|
||||
HYPHEN = 3;
|
||||
}
|
||||
|
||||
// Detected break type.
|
||||
Type type = 1;
|
||||
}
|
||||
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
|
||||
Layout layout = 1;
|
||||
|
||||
// Detected break at the end of a [Token][google.cloud.documentai.v1beta2.Document.Page.Token].
|
||||
DetectedBreak detected_break = 2;
|
||||
|
||||
// A list of detected languages together with confidence.
|
||||
repeated DetectedLanguage detected_languages = 3;
|
||||
}
|
||||
|
||||
// Detected non-text visual elements e.g. checkbox, signature etc. on the
|
||||
// page.
|
||||
message VisualElement {
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement].
|
||||
Layout layout = 1;
|
||||
|
||||
// Type of the [VisualElement][google.cloud.documentai.v1beta2.Document.Page.VisualElement].
|
||||
string type = 2;
|
||||
|
||||
// A list of detected languages together with confidence.
|
||||
repeated DetectedLanguage detected_languages = 3;
|
||||
}
|
||||
|
||||
// A table representation similar to HTML table structure.
|
||||
message Table {
|
||||
// A row of table cells.
|
||||
message TableRow {
|
||||
// Cells that make up this row.
|
||||
repeated TableCell cells = 1;
|
||||
}
|
||||
|
||||
// A cell representation inside the table.
|
||||
message TableCell {
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [TableCell][google.cloud.documentai.v1beta2.Document.Page.Table.TableCell].
|
||||
Layout layout = 1;
|
||||
|
||||
// How many rows this cell spans.
|
||||
int32 row_span = 2;
|
||||
|
||||
// How many columns this cell spans.
|
||||
int32 col_span = 3;
|
||||
|
||||
// A list of detected languages together with confidence.
|
||||
repeated DetectedLanguage detected_languages = 4;
|
||||
}
|
||||
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for [Table][google.cloud.documentai.v1beta2.Document.Page.Table].
|
||||
Layout layout = 1;
|
||||
|
||||
// Header rows of the table.
|
||||
repeated TableRow header_rows = 2;
|
||||
|
||||
// Body rows of the table.
|
||||
repeated TableRow body_rows = 3;
|
||||
|
||||
// A list of detected languages together with confidence.
|
||||
repeated DetectedLanguage detected_languages = 4;
|
||||
}
|
||||
|
||||
// A form field detected on the page.
|
||||
message FormField {
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] name. e.g. `Address`, `Email`,
|
||||
// `Grand total`, `Phone number`, etc.
|
||||
Layout field_name = 1;
|
||||
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the [FormField][google.cloud.documentai.v1beta2.Document.Page.FormField] value.
|
||||
Layout field_value = 2;
|
||||
|
||||
// A list of detected languages for name together with confidence.
|
||||
repeated DetectedLanguage name_detected_languages = 3;
|
||||
|
||||
// A list of detected languages for value together with confidence.
|
||||
repeated DetectedLanguage value_detected_languages = 4;
|
||||
|
||||
// If the value is non-textual, this field represents the type. Current
|
||||
// valid values are:
|
||||
// - blank (this indicates the field_value is normal text)
|
||||
// - "unfilled_checkbox"
|
||||
// - "filled_checkbox"
|
||||
string value_type = 5;
|
||||
|
||||
// An internal field, created for Labeling UI to export key text.
|
||||
string corrected_key_text = 6;
|
||||
|
||||
// An internal field, created for Labeling UI to export value text.
|
||||
string corrected_value_text = 7;
|
||||
}
|
||||
|
||||
// Detected language for a structural component.
|
||||
message DetectedLanguage {
|
||||
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
|
||||
// information, see
|
||||
// http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
|
||||
string language_code = 1;
|
||||
|
||||
// Confidence of detected language. Range [0, 1].
|
||||
float confidence = 2;
|
||||
}
|
||||
|
||||
// 1-based index for current [Page][google.cloud.documentai.v1beta2.Document.Page] in a parent [Document][google.cloud.documentai.v1beta2.Document].
|
||||
// Useful when a page is taken out of a [Document][google.cloud.documentai.v1beta2.Document] for individual
|
||||
// processing.
|
||||
int32 page_number = 1;
|
||||
|
||||
// Physical dimension of the page.
|
||||
Dimension dimension = 2;
|
||||
|
||||
// [Layout][google.cloud.documentai.v1beta2.Document.Page.Layout] for the page.
|
||||
Layout layout = 3;
|
||||
|
||||
// A list of detected languages together with confidence.
|
||||
repeated DetectedLanguage detected_languages = 4;
|
||||
|
||||
// A list of visually detected text blocks on the page.
|
||||
// A block has a set of lines (collected into paragraphs) that have a common
|
||||
// line-spacing and orientation.
|
||||
repeated Block blocks = 5;
|
||||
|
||||
// A list of visually detected text paragraphs on the page.
|
||||
// A collection of lines that a human would perceive as a paragraph.
|
||||
repeated Paragraph paragraphs = 6;
|
||||
|
||||
// A list of visually detected text lines on the page.
|
||||
// A collection of tokens that a human would perceive as a line.
|
||||
repeated Line lines = 7;
|
||||
|
||||
// A list of visually detected tokens on the page.
|
||||
repeated Token tokens = 8;
|
||||
|
||||
// A list of detected non-text visual elements e.g. checkbox,
|
||||
// signature etc. on the page.
|
||||
repeated VisualElement visual_elements = 9;
|
||||
|
||||
// A list of visually detected tables on the page.
|
||||
repeated Table tables = 10;
|
||||
|
||||
// A list of visually detected form fields on the page.
|
||||
repeated FormField form_fields = 11;
|
||||
}
|
||||
|
||||
// A phrase in the text that is a known entity type, such as a person, an
|
||||
// organization, or location.
|
||||
message Entity {
|
||||
// Provenance of the entity.
|
||||
// Text anchor indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
|
||||
TextAnchor text_anchor = 1;
|
||||
|
||||
// Entity type from a schema e.g. `Address`.
|
||||
string type = 2;
|
||||
|
||||
// Text value in the document e.g. `1600 Amphitheatre Pkwy`.
|
||||
string mention_text = 3;
|
||||
|
||||
// Deprecated. Use `id` field instead.
|
||||
string mention_id = 4;
|
||||
|
||||
// Optional. Confidence of detected Schema entity. Range [0, 1].
|
||||
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. Represents the provenance of this entity wrt. the location on the
|
||||
// page where it was found.
|
||||
PageAnchor page_anchor = 6 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. Canonical id. This will be a unique value in the entity list
|
||||
// for this document.
|
||||
string id = 7 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. Temporary field to store the bounding poly for short-term POCs. Used by
|
||||
// the frontend only. Do not use before you talk to ybo@ and lukasr@.
|
||||
BoundingPoly bounding_poly_for_demo_frontend = 8 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// Relationship between [Entities][google.cloud.documentai.v1beta2.Document.Entity].
|
||||
message EntityRelation {
|
||||
// Subject entity id.
|
||||
string subject_id = 1;
|
||||
|
||||
// Object entity id.
|
||||
string object_id = 2;
|
||||
|
||||
// Relationship description.
|
||||
string relation = 3;
|
||||
}
|
||||
|
||||
// Text reference indexing into the [Document.text][google.cloud.documentai.v1beta2.Document.text].
|
||||
message TextAnchor {
|
||||
// A text segment in the [Document.text][google.cloud.documentai.v1beta2.Document.text]. The indices may be out of bounds
|
||||
// which indicate that the text extends into another document shard for
|
||||
// large sharded documents. See [ShardInfo.text_offset][google.cloud.documentai.v1beta2.Document.ShardInfo.text_offset]
|
||||
message TextSegment {
|
||||
// [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] start UTF-8 char index in the [Document.text][google.cloud.documentai.v1beta2.Document.text].
|
||||
int64 start_index = 1;
|
||||
|
||||
// [TextSegment][google.cloud.documentai.v1beta2.Document.TextAnchor.TextSegment] half open end UTF-8 char index in the
|
||||
// [Document.text][google.cloud.documentai.v1beta2.Document.text].
|
||||
int64 end_index = 2;
|
||||
}
|
||||
|
||||
// The text segments from the [Document.text][google.cloud.documentai.v1beta2.Document.text].
|
||||
repeated TextSegment text_segments = 1;
|
||||
}
|
||||
|
||||
// Referencing elements in [Document.pages][google.cloud.documentai.v1beta2.Document.pages].
|
||||
message PageAnchor {
|
||||
// Represents a weak reference to a page element within a document.
|
||||
message PageRef {
|
||||
// The type of layout that is being referenced.
|
||||
enum LayoutType {
|
||||
// Layout Unspecified.
|
||||
LAYOUT_TYPE_UNSPECIFIED = 0;
|
||||
|
||||
// References a [Page.blocks][google.cloud.documentai.v1beta2.Document.Page.blocks] element.
|
||||
BLOCK = 1;
|
||||
|
||||
// References a [Page.paragraphs][google.cloud.documentai.v1beta2.Document.Page.paragraphs] element.
|
||||
PARAGRAPH = 2;
|
||||
|
||||
// References a [Page.lines][google.cloud.documentai.v1beta2.Document.Page.lines] element.
|
||||
LINE = 3;
|
||||
|
||||
// References a [Page.tokens][google.cloud.documentai.v1beta2.Document.Page.tokens] element.
|
||||
TOKEN = 4;
|
||||
|
||||
// References a [Page.visual_elements][google.cloud.documentai.v1beta2.Document.Page.visual_elements] element.
|
||||
VISUAL_ELEMENT = 5;
|
||||
|
||||
// Refrrences a [Page.tables][google.cloud.documentai.v1beta2.Document.Page.tables] element.
|
||||
TABLE = 6;
|
||||
|
||||
// References a [Page.form_fields][google.cloud.documentai.v1beta2.Document.Page.form_fields] element.
|
||||
FORM_FIELD = 7;
|
||||
}
|
||||
|
||||
// Required. Index into the [Document.pages][google.cloud.documentai.v1beta2.Document.pages] element
|
||||
int64 page = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Optional. The type of the layout element that is being referenced. If not
|
||||
// specified the whole page is assumed to be referenced.
|
||||
LayoutType layout_type = 2 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. The [Page.Layout.id][google.cloud.documentai.v1beta2.Document.Page.Layout.id] on the page that this element
|
||||
// references. If [LayoutRef.type][] is specified this id must also be
|
||||
// specified.
|
||||
string layout_id = 3 [(google.api.field_behavior) = OPTIONAL];
|
||||
}
|
||||
|
||||
// One or more references to visual page elements
|
||||
repeated PageRef page_refs = 1;
|
||||
}
|
||||
|
||||
// Original source document from the user.
|
||||
oneof source {
|
||||
// Currently supports Google Cloud Storage URI of the form
|
||||
// `gs://bucket_name/object_name`. Object versioning is not supported.
|
||||
// See [Google Cloud Storage Request
|
||||
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more
|
||||
// info.
|
||||
string uri = 1;
|
||||
|
||||
// Inline document content, represented as a stream of bytes.
|
||||
// Note: As with all `bytes` fields, protobuffers use a pure binary
|
||||
// representation, whereas JSON representations use base64.
|
||||
bytes content = 2;
|
||||
}
|
||||
|
||||
// An IANA published MIME type (also referred to as media type). For more
|
||||
// information, see
|
||||
// https://www.iana.org/assignments/media-types/media-types.xhtml.
|
||||
string mime_type = 3;
|
||||
|
||||
// UTF-8 encoded text in reading order from the document.
|
||||
string text = 4;
|
||||
|
||||
// Styles for the [Document.text][google.cloud.documentai.v1beta2.Document.text].
|
||||
repeated Style text_styles = 5;
|
||||
|
||||
// Visual page layout for the [Document][google.cloud.documentai.v1beta2.Document].
|
||||
repeated Page pages = 6;
|
||||
|
||||
// A list of entities detected on [Document.text][google.cloud.documentai.v1beta2.Document.text]. For document shards,
|
||||
// entities in this list may cross shard boundaries.
|
||||
repeated Entity entities = 7;
|
||||
|
||||
// Relationship among [Document.entities][google.cloud.documentai.v1beta2.Document.entities].
|
||||
repeated EntityRelation entity_relations = 8;
|
||||
|
||||
// Information about the sharding if this document is sharded part of a larger
|
||||
// document. If the document is not sharded, this message is not specified.
|
||||
ShardInfo shard_info = 9;
|
||||
|
||||
// [Label][google.cloud.documentai.v1beta2.Document.Label]s for this document.
|
||||
repeated Label labels = 11;
|
||||
|
||||
// Any error that occurred while processing this document.
|
||||
google.rpc.Status error = 10;
|
||||
}
|
||||
|
|
@ -0,0 +1,343 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.cloud.documentai.v1beta2;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
import "google/api/client.proto";
|
||||
import "google/api/field_behavior.proto";
|
||||
import "google/cloud/documentai/v1beta2/document.proto";
|
||||
import "google/cloud/documentai/v1beta2/geometry.proto";
|
||||
import "google/longrunning/operations.proto";
|
||||
import "google/protobuf/timestamp.proto";
|
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
|
||||
option java_multiple_files = true;
|
||||
option java_outer_classname = "DocumentAiProto";
|
||||
option java_package = "com.google.cloud.documentai.v1beta2";
|
||||
|
||||
// Service to parse structured information from unstructured or semi-structured
|
||||
// documents using state-of-the-art Google AI such as natural language,
|
||||
// computer vision, and translation.
|
||||
service DocumentUnderstandingService {
|
||||
option (google.api.default_host) = "us-documentai.googleapis.com";
|
||||
option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";
|
||||
|
||||
// LRO endpoint to batch process many documents. The output is written
|
||||
// to Cloud Storage as JSON in the [Document] format.
|
||||
rpc BatchProcessDocuments(BatchProcessDocumentsRequest) returns (google.longrunning.Operation) {
|
||||
option (google.api.http) = {
|
||||
post: "/v1beta2/{parent=projects/*/locations/*}/documents:batchProcess"
|
||||
body: "*"
|
||||
additional_bindings {
|
||||
post: "/v1beta2/{parent=projects/*}/documents:batchProcess"
|
||||
body: "*"
|
||||
}
|
||||
};
|
||||
option (google.api.method_signature) = "requests";
|
||||
option (google.longrunning.operation_info) = {
|
||||
response_type: "BatchProcessDocumentsResponse"
|
||||
metadata_type: "OperationMetadata"
|
||||
};
|
||||
}
|
||||
|
||||
// Processes a single document.
|
||||
rpc ProcessDocument(ProcessDocumentRequest) returns (Document) {
|
||||
option (google.api.http) = {
|
||||
post: "/v1beta2/{parent=projects/*/locations/*}/documents:process"
|
||||
body: "*"
|
||||
additional_bindings {
|
||||
post: "/v1beta2/{parent=projects/*}/documents:process"
|
||||
body: "*"
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Request to batch process documents as an asynchronous operation. The output
|
||||
// is written to Cloud Storage as JSON in the [Document] format.
|
||||
message BatchProcessDocumentsRequest {
|
||||
// Required. Individual requests for each document.
|
||||
repeated ProcessDocumentRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Target project and location to make a call.
|
||||
//
|
||||
// Format: `projects/{project-id}/locations/{location-id}`.
|
||||
//
|
||||
// If no location is specified, a region will be chosen automatically.
|
||||
string parent = 2;
|
||||
}
|
||||
|
||||
// Request to process one document.
|
||||
message ProcessDocumentRequest {
|
||||
// Target project and location to make a call.
|
||||
//
|
||||
// Format: `projects/{project-id}/locations/{location-id}`.
|
||||
//
|
||||
// If no location is specified, a region will be chosen automatically.
|
||||
// This field is only populated when used in ProcessDocument method.
|
||||
string parent = 9;
|
||||
|
||||
// Required. Information about the input file.
|
||||
InputConfig input_config = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
|
||||
// Optional. The desired output location. This field is only needed in
|
||||
// BatchProcessDocumentsRequest.
|
||||
OutputConfig output_config = 2 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Specifies a known document type for deeper structure detection. Valid
|
||||
// values are currently "general" and "invoice". If not provided, "general"\
|
||||
// is used as default. If any other value is given, the request is rejected.
|
||||
string document_type = 3;
|
||||
|
||||
// Controls table extraction behavior. If not specified, the system will
|
||||
// decide reasonable defaults.
|
||||
TableExtractionParams table_extraction_params = 4;
|
||||
|
||||
// Controls form extraction behavior. If not specified, the system will
|
||||
// decide reasonable defaults.
|
||||
FormExtractionParams form_extraction_params = 5;
|
||||
|
||||
// Controls entity extraction behavior. If not specified, the system will
|
||||
// decide reasonable defaults.
|
||||
EntityExtractionParams entity_extraction_params = 6;
|
||||
|
||||
// Controls OCR behavior. If not specified, the system will decide reasonable
|
||||
// defaults.
|
||||
OcrParams ocr_params = 7;
|
||||
|
||||
// Controls AutoML model prediction behavior. AutoMlParams cannot be used
|
||||
// together with other Params.
|
||||
AutoMlParams automl_params = 8;
|
||||
}
|
||||
|
||||
// Response to an batch document processing request. This is returned in
|
||||
// the LRO Operation after the operation is complete.
|
||||
message BatchProcessDocumentsResponse {
|
||||
// Responses for each individual document.
|
||||
repeated ProcessDocumentResponse responses = 1;
|
||||
}
|
||||
|
||||
// Response to a single document processing request.
|
||||
message ProcessDocumentResponse {
|
||||
// Information about the input file. This is the same as the corresponding
|
||||
// input config in the request.
|
||||
InputConfig input_config = 1;
|
||||
|
||||
// The output location of the parsed responses. The responses are written to
|
||||
// this location as JSON-serialized `Document` objects.
|
||||
OutputConfig output_config = 2;
|
||||
}
|
||||
|
||||
// Parameters to control Optical Character Recognition (OCR) behavior.
|
||||
message OcrParams {
|
||||
// List of languages to use for OCR. In most cases, an empty value
|
||||
// yields the best results since it enables automatic language detection. For
|
||||
// languages based on the Latin alphabet, setting `language_hints` is not
|
||||
// needed. In rare cases, when the language of the text in the image is known,
|
||||
// setting a hint will help get better results (although it will be a
|
||||
// significant hindrance if the hint is wrong). Document processing returns an
|
||||
// error if one or more of the specified languages is not one of the
|
||||
// supported languages.
|
||||
repeated string language_hints = 1;
|
||||
}
|
||||
|
||||
// Parameters to control table extraction behavior.
|
||||
message TableExtractionParams {
|
||||
// Whether to enable table extraction.
|
||||
bool enabled = 1;
|
||||
|
||||
// Optional. Table bounding box hints that can be provided to complex cases
|
||||
// which our algorithm cannot locate the table(s) in.
|
||||
repeated TableBoundHint table_bound_hints = 2 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Optional. Table header hints. The extraction will bias towards producing
|
||||
// these terms as table headers, which may improve accuracy.
|
||||
repeated string header_hints = 3 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Model version of the table extraction system. Default is "builtin/stable".
|
||||
// Specify "builtin/latest" for the latest model.
|
||||
string model_version = 4;
|
||||
}
|
||||
|
||||
// A hint for a table bounding box on the page for table parsing.
|
||||
message TableBoundHint {
|
||||
// Optional. Page number for multi-paged inputs this hint applies to. If not
|
||||
// provided, this hint will apply to all pages by default. This value is
|
||||
// 1-based.
|
||||
int32 page_number = 1 [(google.api.field_behavior) = OPTIONAL];
|
||||
|
||||
// Bounding box hint for a table on this page. The coordinates must be
|
||||
// normalized to [0,1] and the bounding box must be an axis-aligned rectangle.
|
||||
BoundingPoly bounding_box = 2;
|
||||
}
|
||||
|
||||
// Parameters to control form extraction behavior.
|
||||
message FormExtractionParams {
|
||||
// Whether to enable form extraction.
|
||||
bool enabled = 1;
|
||||
|
||||
// User can provide pairs of (key text, value type) to improve the parsing
|
||||
// result.
|
||||
//
|
||||
// For example, if a document has a field called "Date" that holds a date
|
||||
// value and a field called "Amount" that may hold either a currency value
|
||||
// (e.g., "$500.00") or a simple number value (e.g., "20"), you could use the
|
||||
// following hints: [ {"key": "Date", value_types: [ "DATE"]}, {"key":
|
||||
// "Amount", "value_types": [ "PRICE", "NUMBER" ]} ]
|
||||
//
|
||||
// If the value type is unknown, but you want to provide hints for the keys,
|
||||
// you can leave the value_types field blank. e.g. {"key": "Date",
|
||||
// "value_types": []}
|
||||
repeated KeyValuePairHint key_value_pair_hints = 2;
|
||||
|
||||
// Model version of the form extraction system. Default is
|
||||
// "builtin/stable". Specify "builtin/latest" for the latest model.
|
||||
// For custom form models, specify: “custom/{model_name}". Model name
|
||||
// format is "bucket_name/path/to/modeldir" corresponding to
|
||||
// "gs://bucket_name/path/to/modeldir" where annotated examples are stored.
|
||||
string model_version = 3;
|
||||
}
|
||||
|
||||
// User-provided hint for key value pair.
|
||||
message KeyValuePairHint {
|
||||
// The key text for the hint.
|
||||
string key = 1;
|
||||
|
||||
// Type of the value. This is case-insensitive, and could be one of:
|
||||
// ADDRESS, LOCATION, ORGANIZATION, PERSON, PHONE_NUMBER,
|
||||
// ID, NUMBER, EMAIL, PRICE, TERMS, DATE, NAME. Types not in this list will
|
||||
// be ignored.
|
||||
repeated string value_types = 2;
|
||||
}
|
||||
|
||||
// Parameters to control entity extraction behavior.
|
||||
message EntityExtractionParams {
|
||||
// Whether to enable entity extraction.
|
||||
bool enabled = 1;
|
||||
|
||||
// Model version of the entity extraction. Default is
|
||||
// "builtin/stable". Specify "builtin/latest" for the latest model.
|
||||
string model_version = 2;
|
||||
}
|
||||
|
||||
// Parameters to control AutoML model prediction behavior.
|
||||
message AutoMlParams {
|
||||
// Resource name of the AutoML model.
|
||||
//
|
||||
// Format: `projects/{project-id}/locations/{location-id}/models/{model-id}`.
|
||||
string model = 1;
|
||||
}
|
||||
|
||||
// The desired input location and metadata.
|
||||
message InputConfig {
|
||||
// Required.
|
||||
oneof source {
|
||||
// The Google Cloud Storage location to read the input from. This must be a
|
||||
// single file.
|
||||
GcsSource gcs_source = 1;
|
||||
|
||||
// Content in bytes, represented as a stream of bytes.
|
||||
// Note: As with all `bytes` fields, proto buffer messages use a pure binary
|
||||
// representation, whereas JSON representations use base64.
|
||||
//
|
||||
// This field only works for synchronous ProcessDocument method.
|
||||
bytes contents = 3;
|
||||
}
|
||||
|
||||
// Required. Mimetype of the input. Current supported mimetypes are application/pdf,
|
||||
// image/tiff, and image/gif.
|
||||
// In addition, application/json type is supported for requests with
|
||||
// [ProcessDocumentRequest.automl_params][google.cloud.documentai.v1beta2.ProcessDocumentRequest.automl_params] field set. The JSON file needs to
|
||||
// be in [Document][google.cloud.documentai.v1beta2.Document] format.
|
||||
string mime_type = 2 [(google.api.field_behavior) = REQUIRED];
|
||||
}
|
||||
|
||||
// The desired output location and metadata.
|
||||
message OutputConfig {
|
||||
// Required.
|
||||
oneof destination {
|
||||
// The Google Cloud Storage location to write the output to.
|
||||
GcsDestination gcs_destination = 1;
|
||||
}
|
||||
|
||||
// The max number of pages to include into each output Document shard JSON on
|
||||
// Google Cloud Storage.
|
||||
//
|
||||
// The valid range is [1, 100]. If not specified, the default value is 20.
|
||||
//
|
||||
// For example, for one pdf file with 100 pages, 100 parsed pages will be
|
||||
// produced. If `pages_per_shard` = 20, then 5 Document shard JSON files each
|
||||
// containing 20 parsed pages will be written under the prefix
|
||||
// [OutputConfig.gcs_destination.uri][] and suffix pages-x-to-y.json where
|
||||
// x and y are 1-indexed page numbers.
|
||||
//
|
||||
// Example GCS outputs with 157 pages and pages_per_shard = 50:
|
||||
//
|
||||
// <prefix>pages-001-to-050.json
|
||||
// <prefix>pages-051-to-100.json
|
||||
// <prefix>pages-101-to-150.json
|
||||
// <prefix>pages-151-to-157.json
|
||||
int32 pages_per_shard = 2;
|
||||
}
|
||||
|
||||
// The Google Cloud Storage location where the input file will be read from.
|
||||
message GcsSource {
|
||||
string uri = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
}
|
||||
|
||||
// The Google Cloud Storage location where the output file will be written to.
|
||||
message GcsDestination {
|
||||
string uri = 1 [(google.api.field_behavior) = REQUIRED];
|
||||
}
|
||||
|
||||
// Contains metadata for the BatchProcessDocuments operation.
|
||||
message OperationMetadata {
|
||||
enum State {
|
||||
// The default value. This value is used if the state is omitted.
|
||||
STATE_UNSPECIFIED = 0;
|
||||
|
||||
// Request is received.
|
||||
ACCEPTED = 1;
|
||||
|
||||
// Request operation is waiting for scheduling.
|
||||
WAITING = 2;
|
||||
|
||||
// Request is being processed.
|
||||
RUNNING = 3;
|
||||
|
||||
// The batch processing completed successfully.
|
||||
SUCCEEDED = 4;
|
||||
|
||||
// The batch processing was cancelled.
|
||||
CANCELLED = 5;
|
||||
|
||||
// The batch processing has failed.
|
||||
FAILED = 6;
|
||||
}
|
||||
|
||||
// The state of the current batch processing.
|
||||
State state = 1;
|
||||
|
||||
// A message providing more details about the current state of processing.
|
||||
string state_message = 2;
|
||||
|
||||
// The creation time of the operation.
|
||||
google.protobuf.Timestamp create_time = 3;
|
||||
|
||||
// The last update time of the operation.
|
||||
google.protobuf.Timestamp update_time = 4;
|
||||
}
|
||||
|
|
@ -0,0 +1,118 @@
|
|||
type: com.google.api.codegen.ConfigProto
|
||||
config_schema_version: 1.0.0
|
||||
# The settings of generated code in a specific language.
|
||||
language_settings:
|
||||
java:
|
||||
package_name: com.google.cloud.documentai.v1beta2
|
||||
python:
|
||||
package_name: google.cloud.documentai_v1beta2.gapic
|
||||
go:
|
||||
package_name: cloud.google.com/go/documentai/apiv1beta2
|
||||
csharp:
|
||||
package_name: Google.Cloud.DocumentAi.V1beta2
|
||||
ruby:
|
||||
package_name: Google::Cloud::DocumentAi::V1beta2
|
||||
php:
|
||||
package_name: Google\Cloud\DocumentAi\V1beta2
|
||||
nodejs:
|
||||
package_name: documentai.v1beta2
|
||||
# A list of API interface configurations.
|
||||
interfaces:
|
||||
# The fully qualified name of the API interface.
|
||||
- name: google.cloud.documentai.v1beta2.DocumentUnderstandingService
|
||||
# A list of resource collection configurations.
|
||||
# Consists of a name_pattern and an entity_name.
|
||||
# The name_pattern is a pattern to describe the names of the resources of this
|
||||
# collection, using the platform's conventions for URI patterns. A generator
|
||||
# may use this to generate methods to compose and decompose such names. The
|
||||
# pattern should use named placeholders as in `shelves/{shelf}/books/{book}`;
|
||||
# those will be taken as hints for the parameter names of the generated
|
||||
# methods. If empty, no name methods are generated.
|
||||
# The entity_name is the name to be used as a basis for generated methods and
|
||||
# classes.
|
||||
collections: []
|
||||
# Definition for retryable codes.
|
||||
retry_codes_def:
|
||||
- name: idempotent
|
||||
retry_codes:
|
||||
- DEADLINE_EXCEEDED
|
||||
- UNAVAILABLE
|
||||
- name: non_idempotent
|
||||
retry_codes: []
|
||||
# Definition for retry/backoff parameters.
|
||||
retry_params_def:
|
||||
- name: default
|
||||
initial_retry_delay_millis: 100
|
||||
retry_delay_multiplier: 1.3
|
||||
max_retry_delay_millis: 60000
|
||||
initial_rpc_timeout_millis: 20000
|
||||
rpc_timeout_multiplier: 1
|
||||
max_rpc_timeout_millis: 20000
|
||||
total_timeout_millis: 600000
|
||||
# A list of method configurations.
|
||||
# Common properties:
|
||||
#
|
||||
# name - The simple name of the method.
|
||||
#
|
||||
# flattening - Specifies the configuration for parameter flattening.
|
||||
# Describes the parameter groups for which a generator should produce method
|
||||
# overloads which allow a client to directly pass request message fields as
|
||||
# method parameters. This information may or may not be used, depending on
|
||||
# the target language.
|
||||
# Consists of groups, which each represent a list of parameters to be
|
||||
# flattened. Each parameter listed must be a field of the request message.
|
||||
#
|
||||
# required_fields - Fields that are always required for a request to be
|
||||
# valid.
|
||||
#
|
||||
# page_streaming - Specifies the configuration for paging.
|
||||
# Describes information for generating a method which transforms a paging
|
||||
# list RPC into a stream of resources.
|
||||
# Consists of a request and a response.
|
||||
# The request specifies request information of the list method. It defines
|
||||
# which fields match the paging pattern in the request. The request consists
|
||||
# of a page_size_field and a token_field. The page_size_field is the name of
|
||||
# the optional field specifying the maximum number of elements to be
|
||||
# returned in the response. The token_field is the name of the field in the
|
||||
# request containing the page token.
|
||||
# The response specifies response information of the list method. It defines
|
||||
# which fields match the paging pattern in the response. The response
|
||||
# consists of a token_field and a resources_field. The token_field is the
|
||||
# name of the field in the response containing the next page token. The
|
||||
# resources_field is the name of the field in the response containing the
|
||||
# list of resources belonging to the page.
|
||||
#
|
||||
# retry_codes_name - Specifies the configuration for retryable codes. The
|
||||
# name must be defined in interfaces.retry_codes_def.
|
||||
#
|
||||
# retry_params_name - Specifies the configuration for retry/backoff
|
||||
# parameters. The name must be defined in interfaces.retry_params_def.
|
||||
#
|
||||
# field_name_patterns - Maps the field name of the request type to
|
||||
# entity_name of interfaces.collections.
|
||||
# Specifies the string pattern that the field must follow.
|
||||
#
|
||||
# timeout_millis - Specifies the default timeout for a non-retrying call. If
|
||||
# the call is retrying, refer to retry_params_name instead.
|
||||
methods:
|
||||
- name: BatchProcessDocuments
|
||||
flattening:
|
||||
groups:
|
||||
- parameters:
|
||||
- requests
|
||||
required_fields:
|
||||
- requests
|
||||
retry_codes_name: idempotent
|
||||
retry_params_name: default
|
||||
long_running:
|
||||
return_type: google.cloud.documentai.v1beta2.BatchProcessDocumentsResponse
|
||||
metadata_type: google.cloud.documentai.v1beta2.OperationMetadata
|
||||
initial_poll_delay_millis: 20000
|
||||
poll_delay_multiplier: 1.5
|
||||
max_poll_delay_millis: 45000
|
||||
total_poll_timeout_millis: 86400000
|
||||
timeout_millis: 60000
|
||||
- name: ProcessDocument
|
||||
retry_codes_name: idempotent
|
||||
retry_params_name: default
|
||||
timeout_millis: 60000
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
type: google.api.Service
|
||||
config_version: 3
|
||||
name: documentai.googleapis.com
|
||||
title: Cloud Document AI API
|
||||
|
||||
apis:
|
||||
- name: google.cloud.documentai.v1beta2.DocumentUnderstandingService
|
||||
|
||||
types:
|
||||
- name: google.cloud.documentai.v1beta2.BatchProcessDocumentsResponse
|
||||
- name: google.cloud.documentai.v1beta2.Document
|
||||
- name: google.cloud.documentai.v1beta2.OperationMetadata
|
||||
|
||||
documentation:
|
||||
summary: |-
|
||||
Service to parse structured information from unstructured or
|
||||
semi-structured documents using state-of-the-art Google AI such as natural
|
||||
language, computer vision, translation, and AutoML.
|
||||
|
||||
authentication:
|
||||
rules:
|
||||
- selector: google.cloud.documentai.v1beta2.DocumentUnderstandingService.BatchProcessDocuments
|
||||
oauth:
|
||||
canonical_scopes: |-
|
||||
https://www.googleapis.com/auth/cloud-platform
|
||||
- selector: google.cloud.documentai.v1beta2.DocumentUnderstandingService.ProcessDocument
|
||||
oauth:
|
||||
canonical_scopes: |-
|
||||
https://www.googleapis.com/auth/cloud-platform
|
||||
- selector: 'google.longrunning.Operations.*'
|
||||
oauth:
|
||||
canonical_scopes: |-
|
||||
https://www.googleapis.com/auth/cloud-platform
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
{
|
||||
"methodConfig": [
|
||||
{
|
||||
"name": [
|
||||
{
|
||||
"service": "google.cloud.documentai.v1beta2.DocumentUnderstandingService",
|
||||
"method": "BatchProcessDocuments"
|
||||
},
|
||||
{
|
||||
"service": "google.cloud.documentai.v1beta2.DocumentUnderstandingService",
|
||||
"method": "ProcessDocument"
|
||||
}
|
||||
],
|
||||
"timeout": "60s",
|
||||
"retryPolicy": {
|
||||
"initialBackoff": "0.100s",
|
||||
"maxBackoff": "60s",
|
||||
"backoffMultiplier": 1.3,
|
||||
"retryableStatusCodes": [
|
||||
"DEADLINE_EXCEEDED",
|
||||
"UNAVAILABLE"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
// Copyright 2020 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.cloud.documentai.v1beta2;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
|
||||
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1beta2;documentai";
|
||||
option java_multiple_files = true;
|
||||
option java_outer_classname = "GeometryProto";
|
||||
option java_package = "com.google.cloud.documentai.v1beta2";
|
||||
|
||||
// A vertex represents a 2D point in the image.
|
||||
// NOTE: the vertex coordinates are in the same scale as the original image.
|
||||
message Vertex {
|
||||
// X coordinate.
|
||||
int32 x = 1;
|
||||
|
||||
// Y coordinate.
|
||||
int32 y = 2;
|
||||
}
|
||||
|
||||
// A vertex represents a 2D point in the image.
|
||||
// NOTE: the normalized vertex coordinates are relative to the original image
|
||||
// and range from 0 to 1.
|
||||
message NormalizedVertex {
|
||||
// X coordinate.
|
||||
float x = 1;
|
||||
|
||||
// Y coordinate.
|
||||
float y = 2;
|
||||
}
|
||||
|
||||
// A bounding polygon for the detected image annotation.
|
||||
message BoundingPoly {
|
||||
// The bounding polygon vertices.
|
||||
repeated Vertex vertices = 1;
|
||||
|
||||
// The bounding polygon normalized vertices.
|
||||
repeated NormalizedVertex normalized_vertices = 2;
|
||||
}
|
||||
Loading…
Reference in New Issue