From 4f34b95cc7deda8e141a50339a53fe8bb35d778d Mon Sep 17 00:00:00 2001 From: Google APIs Date: Mon, 21 Oct 2019 16:55:51 -0700 Subject: [PATCH] Synchronize new proto/yaml changes. PiperOrigin-RevId: 275954662 --- google/cloud/automl/automl_v1.yaml | 11 + .../cloud/automl/v1/annotation_payload.proto | 31 +- google/cloud/automl/v1/annotation_spec.proto | 46 + google/cloud/automl/v1/automl_gapic.yaml | 109 +++ google/cloud/automl/v1/classification.proto | 172 ++++ google/cloud/automl/v1/data_items.proto | 145 ++- google/cloud/automl/v1/dataset.proto | 20 +- google/cloud/automl/v1/detection.proto | 88 ++ google/cloud/automl/v1/geometry.proto | 48 + google/cloud/automl/v1/image.proto | 194 ++++ google/cloud/automl/v1/io.proto | 835 +++++++++++++++++- google/cloud/automl/v1/model.proto | 29 +- google/cloud/automl/v1/model_evaluation.proto | 36 +- google/cloud/automl/v1/operations.proto | 103 ++- .../cloud/automl/v1/prediction_service.proto | 128 ++- google/cloud/automl/v1/service.proto | 111 ++- google/cloud/automl/v1/text.proto | 60 ++ google/cloud/automl/v1/text_extraction.proto | 70 ++ google/cloud/automl/v1/text_segment.proto | 43 + google/cloud/automl/v1/text_sentiment.proto | 79 ++ google/cloud/automl/v1/translation.proto | 9 +- 21 files changed, 2320 insertions(+), 47 deletions(-) create mode 100644 google/cloud/automl/v1/annotation_spec.proto create mode 100644 google/cloud/automl/v1/classification.proto create mode 100644 google/cloud/automl/v1/detection.proto create mode 100644 google/cloud/automl/v1/geometry.proto create mode 100644 google/cloud/automl/v1/image.proto create mode 100644 google/cloud/automl/v1/text.proto create mode 100644 google/cloud/automl/v1/text_extraction.proto create mode 100644 google/cloud/automl/v1/text_segment.proto create mode 100644 google/cloud/automl/v1/text_sentiment.proto diff --git a/google/cloud/automl/automl_v1.yaml b/google/cloud/automl/automl_v1.yaml index cee25722..7f890c92 100644 --- a/google/cloud/automl/automl_v1.yaml +++ b/google/cloud/automl/automl_v1.yaml @@ -9,6 +9,7 @@ apis: types: - name: google.cloud.automl.v1.OperationMetadata +- name: google.cloud.automl.v1.BatchPredictResult documentation: summary: |- @@ -32,6 +33,8 @@ backend: deadline: 5.0 - selector: google.cloud.automl.v1.PredictionService.Predict deadline: 60.0 + - selector: google.cloud.automl.v1.PredictionService.BatchPredict + deadline: 20.0 - selector: google.cloud.automl.v1.AutoMl.CreateDataset deadline: 5.0 - selector: google.cloud.automl.v1.AutoMl.GetDataset @@ -46,6 +49,8 @@ backend: deadline: 20.0 - selector: google.cloud.automl.v1.AutoMl.ExportData deadline: 5.0 + - selector: google.cloud.automl.v1.AutoMl.GetAnnotationSpec + deadline: 5.0 - selector: google.cloud.automl.v1.AutoMl.CreateModel deadline: 20.0 - selector: google.cloud.automl.v1.AutoMl.GetModel @@ -56,6 +61,12 @@ backend: deadline: 50.0 - selector: google.cloud.automl.v1.AutoMl.DeleteModel deadline: 5.0 + - selector: google.cloud.automl.v1.AutoMl.DeployModel + deadline: 5.0 + - selector: google.cloud.automl.v1.AutoMl.UndeployModel + deadline: 5.0 + - selector: google.cloud.automl.v1.AutoMl.ExportModel + deadline: 5.0 - selector: google.cloud.automl.v1.AutoMl.GetModelEvaluation deadline: 5.0 - selector: google.cloud.automl.v1.AutoMl.ListModelEvaluations diff --git a/google/cloud/automl/v1/annotation_payload.proto b/google/cloud/automl/v1/annotation_payload.proto index 9469c261..0a049b30 100644 --- a/google/cloud/automl/v1/annotation_payload.proto +++ b/google/cloud/automl/v1/annotation_payload.proto @@ -17,12 +17,16 @@ syntax = "proto3"; package google.cloud.automl.v1; +import "google/cloud/automl/v1/classification.proto"; +import "google/cloud/automl/v1/detection.proto"; +import "google/cloud/automl/v1/text_extraction.proto"; +import "google/cloud/automl/v1/text_sentiment.proto"; import "google/cloud/automl/v1/translation.proto"; import "google/protobuf/any.proto"; import "google/api/annotations.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_package = "com.google.cloud.automl.v1"; option php_namespace = "Google\\Cloud\\AutoML\\V1"; @@ -35,5 +39,30 @@ message AnnotationPayload { oneof detail { // Annotation details for translation. TranslationAnnotation translation = 2; + + // Annotation details for content or image classification. + ClassificationAnnotation classification = 3; + + // Annotation details for image object detection. + ImageObjectDetectionAnnotation image_object_detection = 4; + + // Annotation details for text extraction. + TextExtractionAnnotation text_extraction = 6; + + // Annotation details for text sentiment. + TextSentimentAnnotation text_sentiment = 7; } + + // Output only . The resource ID of the annotation spec that + // this annotation pertains to. The annotation spec comes from either an + // ancestor dataset, or the dataset that was used to train the model in use. + string annotation_spec_id = 1; + + // Output only. The value of + // [display_name][google.cloud.automl.v1p1beta.AnnotationSpec.display_name] + // when the model was trained. Because this field returns a value at model + // training time, for different models trained using the same dataset, the + // returned value could be different as model owner could update the + // `display_name` between any two model training. + string display_name = 5; } diff --git a/google/cloud/automl/v1/annotation_spec.proto b/google/cloud/automl/v1/annotation_spec.proto new file mode 100644 index 00000000..6cd8c796 --- /dev/null +++ b/google/cloud/automl/v1/annotation_spec.proto @@ -0,0 +1,46 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.automl.v1; + +import "google/api/annotations.proto"; + +option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; +option java_multiple_files = true; +option java_package = "com.google.cloud.automl.v1"; +option php_namespace = "Google\\Cloud\\AutoML\\V1"; +option ruby_package = "Google::Cloud::AutoML::V1"; + +// A definition of an annotation spec. +message AnnotationSpec { + // Output only. Resource name of the annotation spec. + // Form: + // + // 'projects/{project_id}/locations/{location_id}/datasets/{dataset_id}/annotationSpecs/{annotation_spec_id}' + string name = 1; + + // Required. + // The name of the annotation spec to show in the interface. The name can be + // up to 32 characters long and must match the regexp `[a-zA-Z0-9_]+`. + // (_), and ASCII digits 0-9. + string display_name = 2; + + // Output only. The number of examples in the parent dataset + // labeled by the annotation spec. + int32 example_count = 9; +} diff --git a/google/cloud/automl/v1/automl_gapic.yaml b/google/cloud/automl/v1/automl_gapic.yaml index 3d99419b..c104f055 100644 --- a/google/cloud/automl/v1/automl_gapic.yaml +++ b/google/cloud/automl/v1/automl_gapic.yaml @@ -36,6 +36,8 @@ interfaces: entity_name: location - name_pattern: projects/{project}/locations/{location}/datasets/{dataset} entity_name: dataset + - name_pattern: projects/{project}/locations/{location}/datasets/{dataset}/annotationSpecs/{annotation_spec} + entity_name: annotation_spec - name_pattern: projects/{project}/locations/{location}/models/{model} entity_name: model - name_pattern: projects/{project}/locations/{location}/models/{model}/modelEvaluations/{model_evaluation} @@ -237,6 +239,18 @@ interfaces: poll_delay_multiplier: 1.5 max_poll_delay_millis: 5000 total_poll_timeout_millis: 300000 + - name: GetAnnotationSpec + flattening: + groups: + - parameters: + - name + required_fields: + - name + retry_codes_name: idempotent + retry_params_name: default + field_name_patterns: + name: annotation_spec + timeout_millis: 5000 - name: CreateModel flattening: groups: @@ -327,6 +341,62 @@ interfaces: poll_delay_multiplier: 1.5 max_poll_delay_millis: 5000 total_poll_timeout_millis: 300000 + - name: DeployModel + flattening: + groups: + - parameters: + - name + required_fields: + - name + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + name: model + resource_name_treatment: STATIC_TYPES + timeout_millis: 5000 + long_running: + return_type: google.protobuf.Empty + metadata_type: google.cloud.automl.v1.OperationMetadata + initial_poll_delay_millis: 500 + poll_delay_multiplier: 1.5 + max_poll_delay_millis: 5000 + total_poll_timeout_millis: 3600000 + - name: UndeployModel + flattening: + groups: + - parameters: + - name + required_fields: + - name + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + name: model + resource_name_treatment: STATIC_TYPES + timeout_millis: 5000 + long_running: + return_type: google.protobuf.Empty + metadata_type: google.cloud.automl.v1.OperationMetadata + initial_poll_delay_millis: 500 + poll_delay_multiplier: 1.5 + max_poll_delay_millis: 5000 + total_poll_timeout_millis: 3600000 + - name: ExportModel + required_fields: + - name + - output_config + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + name: model + long_running: + return_type: google.protobuf.Empty + metadata_type: google.cloud.automl.v1.OperationMetadata + initial_poll_delay_millis: 500 + poll_delay_multiplier: 1.5 + max_poll_delay_millis: 5000 + total_poll_timeout_millis: 300000 + timeout_millis: 5000 - name: GetModelEvaluation flattening: groups: @@ -455,6 +525,30 @@ interfaces: name: model resource_name_treatment: STATIC_TYPES timeout_millis: 60000 + - name: BatchPredict + required_fields: + - name + - input_config + - output_config + flattening: + groups: + - parameters: + - name + - input_config + - output_config + - params + retry_codes_name: non_idempotent + retry_params_name: default + field_name_patterns: + name: model + long_running: + return_type: google.cloud.automl.v1.BatchPredictResult + metadata_type: google.cloud.automl.v1.OperationMetadata + initial_poll_delay_millis: 500 + poll_delay_multiplier: 1.5 + max_poll_delay_millis: 5000 + total_poll_timeout_millis: 86400000 + timeout_millis: 20000 resource_name_generation: - message_name: CreateDatasetRequest field_entity_map: @@ -477,6 +571,9 @@ resource_name_generation: - message_name: ExportDataRequest field_entity_map: name: dataset +- message_name: GetAnnotationSpecRequest + field_entity_map: + name: annotation_spec - message_name: CreateModelRequest field_entity_map: parent: location @@ -492,6 +589,15 @@ resource_name_generation: - message_name: DeleteModelRequest field_entity_map: name: model +- message_name: DeployModelRequest + field_entity_map: + name: model +- message_name: UndeployModelRequest + field_entity_map: + name: model +- message_name: ExportModelRequest + field_entity_map: + name: model - message_name: GetModelEvaluationRequest field_entity_map: name: model_evaluation @@ -501,3 +607,6 @@ resource_name_generation: - message_name: PredictRequest field_entity_map: name: model +- message_name: BatchPredictRequest + field_entity_map: + name: model diff --git a/google/cloud/automl/v1/classification.proto b/google/cloud/automl/v1/classification.proto new file mode 100644 index 00000000..75abeacb --- /dev/null +++ b/google/cloud/automl/v1/classification.proto @@ -0,0 +1,172 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.automl.v1; + +import "google/api/annotations.proto"; + +option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; +option java_multiple_files = true; +option java_outer_classname = "ClassificationProto"; +option java_package = "com.google.cloud.automl.v1"; +option php_namespace = "Google\\Cloud\\AutoML\\V1"; +option ruby_package = "Google::Cloud::AutoML::V1"; + +// Type of the classification problem. +enum ClassificationType { + // An un-set value of this enum. + CLASSIFICATION_TYPE_UNSPECIFIED = 0; + + // At most one label is allowed per example. + MULTICLASS = 1; + + // Multiple labels are allowed for one example. + MULTILABEL = 2; +} + +// Contains annotation details specific to classification. +message ClassificationAnnotation { + // Output only. A confidence estimate between 0.0 and 1.0. A higher value + // means greater confidence that the annotation is positive. If a user + // approves an annotation as negative or positive, the score value remains + // unchanged. If a user creates an annotation, the score is 0 for negative or + // 1 for positive. + float score = 1; +} + +// Model evaluation metrics for classification problems. +message ClassificationEvaluationMetrics { + // Metrics for a single confidence threshold. + message ConfidenceMetricsEntry { + // Output only. Metrics are computed with an assumption that the model + // never returns predictions with score lower than this value. + float confidence_threshold = 1; + + // Output only. Metrics are computed with an assumption that the model + // always returns at most this many predictions (ordered by their score, + // descendingly), but they all still need to meet the confidence_threshold. + int32 position_threshold = 14; + + // Output only. Recall (True Positive Rate) for the given confidence + // threshold. + float recall = 2; + + // Output only. Precision for the given confidence threshold. + float precision = 3; + + // Output only. False Positive Rate for the given confidence threshold. + float false_positive_rate = 8; + + // Output only. The harmonic mean of recall and precision. + float f1_score = 4; + + // Output only. The Recall (True Positive Rate) when only considering the + // label that has the highest prediction score and not below the confidence + // threshold for each example. + float recall_at1 = 5; + + // Output only. The precision when only considering the label that has the + // highest prediction score and not below the confidence threshold for each + // example. + float precision_at1 = 6; + + // Output only. The False Positive Rate when only considering the label that + // has the highest prediction score and not below the confidence threshold + // for each example. + float false_positive_rate_at1 = 9; + + // Output only. The harmonic mean of + // [recall_at1][google.cloud.automl.v1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.recall_at1] + // and + // [precision_at1][google.cloud.automl.v1.ClassificationEvaluationMetrics.ConfidenceMetricsEntry.precision_at1]. + float f1_score_at1 = 7; + + // Output only. The number of model created labels that match a ground truth + // label. + int64 true_positive_count = 10; + + // Output only. The number of model created labels that do not match a + // ground truth label. + int64 false_positive_count = 11; + + // Output only. The number of ground truth labels that are not matched + // by a model created label. + int64 false_negative_count = 12; + + // Output only. The number of labels that were not created by the model, + // but if they would, they would not match a ground truth label. + int64 true_negative_count = 13; + } + + // Confusion matrix of the model running the classification. + message ConfusionMatrix { + // Output only. A row in the confusion matrix. + message Row { + // Output only. Value of the specific cell in the confusion matrix. + // The number of values each row has (i.e. the length of the row) is equal + // to the length of the `annotation_spec_id` field or, if that one is not + // populated, length of the + // [display_name][google.cloud.automl.v1.ClassificationEvaluationMetrics.ConfusionMatrix.display_name] + // field. + repeated int32 example_count = 1; + } + + // Output only. IDs of the annotation specs used in the confusion matrix. + repeated string annotation_spec_id = 1; + + // Output only. Display name of the annotation specs used in the confusion + // matrix, as they were at the moment of the evaluation. + repeated string display_name = 3; + + // Output only. Rows in the confusion matrix. The number of rows is equal to + // the size of `annotation_spec_id`. + // `row[i].example_count[j]` is the number of examples that have ground + // truth of the `annotation_spec_id[i]` and are predicted as + // `annotation_spec_id[j]` by the model being evaluated. + repeated Row row = 2; + } + + // Output only. The Area Under Precision-Recall Curve metric. Micro-averaged + // for the overall evaluation. + float au_prc = 1; + + // Output only. The Area Under Receiver Operating Characteristic curve metric. + // Micro-averaged for the overall evaluation. + float au_roc = 6; + + // Output only. The Log Loss metric. + float log_loss = 7; + + // Output only. Metrics for each confidence_threshold in + // 0.00,0.05,0.10,...,0.95,0.96,0.97,0.98,0.99 and + // position_threshold = INT32_MAX_VALUE. + // ROC and precision-recall curves, and other aggregated metrics are derived + // from them. The confidence metrics entries may also be supplied for + // additional values of position_threshold, but from these no aggregated + // metrics are computed. + repeated ConfidenceMetricsEntry confidence_metrics_entry = 3; + + // Output only. Confusion matrix of the evaluation. + // Only set for MULTICLASS classification problems where number + // of labels is no more than 10. + // Only set for model level evaluation, not for evaluation per label. + ConfusionMatrix confusion_matrix = 4; + + // Output only. The annotation spec ids used for this evaluation. + repeated string annotation_spec_id = 5; +} diff --git a/google/cloud/automl/v1/data_items.proto b/google/cloud/automl/v1/data_items.proto index d5337ac5..c481ec72 100644 --- a/google/cloud/automl/v1/data_items.proto +++ b/google/cloud/automl/v1/data_items.proto @@ -17,19 +17,42 @@ syntax = "proto3"; package google.cloud.automl.v1; +import "google/cloud/automl/v1/geometry.proto"; import "google/cloud/automl/v1/io.proto"; +import "google/cloud/automl/v1/text_segment.proto"; import "google/protobuf/any.proto"; import "google/protobuf/duration.proto"; import "google/protobuf/struct.proto"; import "google/api/annotations.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_package = "com.google.cloud.automl.v1"; option php_namespace = "Google\\Cloud\\AutoML\\V1"; option ruby_package = "Google::Cloud::AutoML::V1"; +// A representation of an image. +// Only images up to 30MB in size are supported. +message Image { + // Input only. The data representing the image. + // For Predict calls [image_bytes][google.cloud.automl.v1.Image.image_bytes] must be set, as other options are not + // currently supported by prediction API. You can read the contents of an + // uploaded image by using the [content_uri][google.cloud.automl.v1.Image.content_uri] field. + oneof data { + // Image content represented as a stream of bytes. + // Note: As with all `bytes` fields, protobuffers use a pure binary + // representation, whereas JSON representations use base64. + bytes image_bytes = 1; + + // An input config specifying the content of the image. + ImageInputConfig image_input_config = 7; + } + + // Output only. HTTP URI to the thumbnail image. + string thumbnail_uri = 4; +} + // A representation of a text snippet. message TextSnippet { // Required. The content of the text snippet as a string. Up to 250000 @@ -45,11 +68,131 @@ message TextSnippet { string content_uri = 4; } +// Message that describes dimension of a document. +message DocumentDimensions { + // Unit of the document dimension. + enum DocumentDimensionUnit { + // Should not be used. + DOCUMENT_DIMENSION_UNIT_UNSPECIFIED = 0; + + // Document dimension is measured in inches. + INCH = 1; + + // Document dimension is measured in centimeters. + CENTIMETER = 2; + + // Document dimension is measured in points. 72 points = 1 inch. + POINT = 3; + } + + // Unit of the dimension. + DocumentDimensionUnit unit = 1; + + // Width value of the document, works together with the unit. + float width = 2; + + // Height value of the document, works together with the unit. + float height = 3; +} + +// A structured text document e.g. a PDF. +message Document { + // Describes the layout information of a [text_segment][google.cloud.automl.v1.Document.Layout.text_segment] in the document. + message Layout { + // The type of TextSegment in the context of the original document. + enum TextSegmentType { + // Should not be used. + TEXT_SEGMENT_TYPE_UNSPECIFIED = 0; + + // The text segment is a token. e.g. word. + TOKEN = 1; + + // The text segment is a paragraph. + PARAGRAPH = 2; + + // The text segment is a form field. + FORM_FIELD = 3; + + // The text segment is the name part of a form field. It will be treated + // as child of another FORM_FIELD TextSegment if its span is subspan of + // another TextSegment with type FORM_FIELD. + FORM_FIELD_NAME = 4; + + // The text segment is the text content part of a form field. It will be + // treated as child of another FORM_FIELD TextSegment if its span is + // subspan of another TextSegment with type FORM_FIELD. + FORM_FIELD_CONTENTS = 5; + + // The text segment is a whole table, including headers, and all rows. + TABLE = 6; + + // The text segment is a table's headers. It will be treated as child of + // another TABLE TextSegment if its span is subspan of another TextSegment + // with type TABLE. + TABLE_HEADER = 7; + + // The text segment is a row in table. It will be treated as child of + // another TABLE TextSegment if its span is subspan of another TextSegment + // with type TABLE. + TABLE_ROW = 8; + + // The text segment is a cell in table. It will be treated as child of + // another TABLE_ROW TextSegment if its span is subspan of another + // TextSegment with type TABLE_ROW. + TABLE_CELL = 9; + } + + // Text Segment that represents a segment in + // [document_text][google.cloud.automl.v1p1beta.Document.document_text]. + TextSegment text_segment = 1; + + // Page number of the [text_segment][google.cloud.automl.v1.Document.Layout.text_segment] in the original document, starts + // from 1. + int32 page_number = 2; + + // The position of the [text_segment][google.cloud.automl.v1.Document.Layout.text_segment] in the page. + // Contains exactly 4 + // + // [normalized_vertices][google.cloud.automl.v1p1beta.BoundingPoly.normalized_vertices] + // and they are connected by edges in the order provided, which will + // represent a rectangle parallel to the frame. The + // [NormalizedVertex-s][google.cloud.automl.v1p1beta.NormalizedVertex] are + // relative to the page. + // Coordinates are based on top-left as point (0,0). + BoundingPoly bounding_poly = 3; + + // The type of the [text_segment][google.cloud.automl.v1.Document.Layout.text_segment] in document. + TextSegmentType text_segment_type = 4; + } + + // An input config specifying the content of the document. + DocumentInputConfig input_config = 1; + + // The plain text version of this document. + TextSnippet document_text = 2; + + // Describes the layout of the document. + // Sorted by [page_number][]. + repeated Layout layout = 3; + + // The dimensions of the page in the document. + DocumentDimensions document_dimensions = 4; + + // Number of pages in the document. + int32 page_count = 5; +} + // Example data used for training or prediction. message ExamplePayload { // Required. Input only. The example data. oneof payload { + // Example image. + Image image = 1; + // Example text. TextSnippet text_snippet = 2; + + // Example document. + Document document = 4; } } diff --git a/google/cloud/automl/v1/dataset.proto b/google/cloud/automl/v1/dataset.proto index 73b7d602..6e0961b3 100644 --- a/google/cloud/automl/v1/dataset.proto +++ b/google/cloud/automl/v1/dataset.proto @@ -17,12 +17,15 @@ syntax = "proto3"; package google.cloud.automl.v1; +import "google/api/resource.proto"; +import "google/cloud/automl/v1/image.proto"; +import "google/cloud/automl/v1/text.proto"; import "google/cloud/automl/v1/translation.proto"; import "google/protobuf/timestamp.proto"; import "google/api/annotations.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_package = "com.google.cloud.automl.v1"; option php_namespace = "Google\\Cloud\\AutoML\\V1"; @@ -36,6 +39,21 @@ message Dataset { oneof dataset_metadata { // Metadata for a dataset used for translation. TranslationDatasetMetadata translation_dataset_metadata = 23; + + // Metadata for a dataset used for image classification. + ImageClassificationDatasetMetadata image_classification_dataset_metadata = 24; + + // Metadata for a dataset used for text classification. + TextClassificationDatasetMetadata text_classification_dataset_metadata = 25; + + // Metadata for a dataset used for image object detection. + ImageObjectDetectionDatasetMetadata image_object_detection_dataset_metadata = 26; + + // Metadata for a dataset used for text extraction. + TextExtractionDatasetMetadata text_extraction_dataset_metadata = 28; + + // Metadata for a dataset used for text sentiment. + TextSentimentDatasetMetadata text_sentiment_dataset_metadata = 30; } // Output only. The resource name of the dataset. diff --git a/google/cloud/automl/v1/detection.proto b/google/cloud/automl/v1/detection.proto new file mode 100644 index 00000000..d5559346 --- /dev/null +++ b/google/cloud/automl/v1/detection.proto @@ -0,0 +1,88 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.automl.v1; + +import "google/api/annotations.proto"; +import "google/cloud/automl/v1/geometry.proto"; +import "google/protobuf/duration.proto"; + +option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; +option java_multiple_files = true; +option java_package = "com.google.cloud.automl.v1"; +option php_namespace = "Google\\Cloud\\AutoML\\V1"; +option ruby_package = "Google::Cloud::AutoML::V1"; + +// Annotation details for image object detection. +message ImageObjectDetectionAnnotation { + // Output only. The rectangle representing the object location. + BoundingPoly bounding_box = 1; + + // Output only. The confidence that this annotation is positive for the parent + // example, value in [0, 1], higher means higher positivity confidence. + float score = 2; +} + +// Bounding box matching model metrics for a single intersection-over-union +// threshold and multiple label match confidence thresholds. +message BoundingBoxMetricsEntry { + // Metrics for a single confidence threshold. + message ConfidenceMetricsEntry { + // Output only. The confidence threshold value used to compute the metrics. + float confidence_threshold = 1; + + // Output only. Recall under the given confidence threshold. + float recall = 2; + + // Output only. Precision under the given confidence threshold. + float precision = 3; + + // Output only. The harmonic mean of recall and precision. + float f1_score = 4; + } + + // Output only. The intersection-over-union threshold value used to compute + // this metrics entry. + float iou_threshold = 1; + + // Output only. The mean average precision, most often close to au_prc. + float mean_average_precision = 2; + + // Output only. Metrics for each label-match confidence_threshold from + // 0.05,0.10,...,0.95,0.96,0.97,0.98,0.99. Precision-recall curve is + // derived from them. + repeated ConfidenceMetricsEntry confidence_metrics_entries = 3; +} + +// Model evaluation metrics for image object detection problems. +// Evaluates prediction quality of labeled bounding boxes. +message ImageObjectDetectionEvaluationMetrics { + // Output only. The total number of bounding boxes (i.e. summed over all + // images) the ground truth used to create this evaluation had. + int32 evaluated_bounding_box_count = 1; + + // Output only. The bounding boxes match metrics for each + // Intersection-over-union threshold 0.05,0.10,...,0.95,0.96,0.97,0.98,0.99 + // and each label confidence threshold 0.05,0.10,...,0.95,0.96,0.97,0.98,0.99 + // pair. + repeated BoundingBoxMetricsEntry bounding_box_metrics_entries = 2; + + // Output only. The single metric for bounding boxes evaluation: + // the mean_average_precision averaged over all bounding_box_metrics_entries. + float bounding_box_mean_average_precision = 3; +} diff --git a/google/cloud/automl/v1/geometry.proto b/google/cloud/automl/v1/geometry.proto new file mode 100644 index 00000000..99bf9134 --- /dev/null +++ b/google/cloud/automl/v1/geometry.proto @@ -0,0 +1,48 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.automl.v1; + +import "google/api/annotations.proto"; + +option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; +option java_multiple_files = true; +option java_package = "com.google.cloud.automl.v1"; +option php_namespace = "Google\\Cloud\\AutoML\\V1"; +option ruby_package = "Google::Cloud::AutoML::V1"; + +// A vertex represents a 2D point in the image. +// The normalized vertex coordinates are between 0 to 1 fractions relative to +// the original plane (image, video). E.g. if the plane (e.g. whole image) would +// have size 10 x 20 then a point with normalized coordinates (0.1, 0.3) would +// be at the position (1, 6) on that plane. +message NormalizedVertex { + // Required. Horizontal coordinate. + float x = 1; + + // Required. Vertical coordinate. + float y = 2; +} + +// A bounding polygon of a detected object on a plane. +// On output both vertices and normalized_vertices are provided. +// The polygon is formed by connecting vertices in the order they are listed. +message BoundingPoly { + // Output only . The bounding polygon normalized vertices. + repeated NormalizedVertex normalized_vertices = 2; +} diff --git a/google/cloud/automl/v1/image.proto b/google/cloud/automl/v1/image.proto new file mode 100644 index 00000000..a6f27873 --- /dev/null +++ b/google/cloud/automl/v1/image.proto @@ -0,0 +1,194 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.automl.v1; + +import "google/api/annotations.proto"; +import "google/api/resource.proto"; +import "google/cloud/automl/v1/annotation_spec.proto"; +import "google/cloud/automl/v1/classification.proto"; +import "google/protobuf/timestamp.proto"; + +option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; +option java_multiple_files = true; +option java_outer_classname = "ImageProto"; +option java_package = "com.google.cloud.automl.v1"; +option php_namespace = "Google\\Cloud\\AutoML\\V1"; +option ruby_package = "Google::Cloud::AutoML::V1"; + +// Dataset metadata that is specific to image classification. +message ImageClassificationDatasetMetadata { + // Required. Type of the classification problem. + ClassificationType classification_type = 1; +} + +// Dataset metadata specific to image object detection. +message ImageObjectDetectionDatasetMetadata {} + +// Model metadata for image classification. +message ImageClassificationModelMetadata { + // Optional. The ID of the `base` model. If it is specified, the new model + // will be created based on the `base` model. Otherwise, the new model will be + // created from scratch. The `base` model must be in the same + // `project` and `location` as the new model to create, and have the same + // `model_type`. + string base_model_id = 1; + + // The train budget of creating this model, expressed in milli node + // hours i.e. 1,000 value in this field means 1 node hour. The actual + // `train_cost` will be equal or less than this value. If further model + // training ceases to provide any improvements, it will stop without using + // full budget and the stop_reason will be `MODEL_CONVERGED`. + // Note, node_hour = actual_hour * number_of_nodes_invovled. + // For model type `cloud`(default), the train budget must be between 8,000 + // and 800,000 milli node hours, inclusive. The default value is 192, 000 + // which represents one day in wall time. For model type + // `mobile-low-latency-1`, `mobile-versatile-1`, `mobile-high-accuracy-1`, + // `mobile-core-ml-low-latency-1`, `mobile-core-ml-versatile-1`, + // `mobile-core-ml-high-accuracy-1`, the train budget must be between 1,000 + // and 100,000 milli node hours, inclusive. The default value is 24, 000 which + // represents one day in wall time. + int64 train_budget_milli_node_hours = 16; + + // Output only. The actual train cost of creating this model, expressed in + // milli node hours, i.e. 1,000 value in this field means 1 node hour. + // Guaranteed to not exceed the train budget. + int64 train_cost_milli_node_hours = 17; + + // Output only. The reason that this create model operation stopped, + // e.g. `BUDGET_REACHED`, `MODEL_CONVERGED`. + string stop_reason = 5; + + // Optional. Type of the model. The available values are: + // * `cloud` - Model to be used via prediction calls to AutoML API. + // This is the default value. + // * `mobile-low-latency-1` - A model that, in addition to providing + // prediction via AutoML API, can also be exported (see + // [AutoMl.ExportModel][google.cloud.automl.v1.AutoMl.ExportModel]) + // and used on a mobile or edge device with TensorFlow + // afterwards. Expected to have low latency, but may have lower + // prediction quality than other models. + // * `mobile-versatile-1` - A model that, in addition to providing + // prediction via AutoML API, can also be exported (see + // [AutoMl.ExportModel][google.cloud.automl.v1.AutoMl.ExportModel]) + // and used on a mobile or edge device with TensorFlow + // afterwards. + // * `mobile-high-accuracy-1` - A model that, in addition to providing + // prediction via AutoML API, can also be exported (see + // [AutoMl.ExportModel][google.cloud.automl.v1.AutoMl.ExportModel]) + // and used on a mobile or edge device with TensorFlow + // afterwards. Expected to have a higher latency, but should + // also have a higher prediction quality than other models. + // * `mobile-core-ml-low-latency-1` - A model that, in addition to providing + // prediction via AutoML API, can also be exported (see + // [AutoMl.ExportModel][google.cloud.automl.v1.AutoMl.ExportModel]) + // and used on a mobile device with Core ML afterwards. Expected + // to have low latency, but may have lower prediction quality + // than other models. + // * `mobile-core-ml-versatile-1` - A model that, in addition to providing + // prediction via AutoML API, can also be exported (see + // [AutoMl.ExportModel][google.cloud.automl.v1.AutoMl.ExportModel]) + // and used on a mobile device with Core ML afterwards. + // * `mobile-core-ml-high-accuracy-1` - A model that, in addition to + // providing prediction via AutoML API, can also be exported + // (see + // [AutoMl.ExportModel][google.cloud.automl.v1.AutoMl.ExportModel]) + // and used on a mobile device with Core ML afterwards. Expected + // to have a higher latency, but should also have a higher + // prediction quality than other models. + string model_type = 7; + + // Output only. An approximate number of online prediction QPS that can + // be supported by this model per each node on which it is deployed. + double node_qps = 13; + + // Output only. The number of nodes this model is deployed on. A node is an + // abstraction of a machine resource, which can handle online prediction QPS + // as given in the node_qps field. + int64 node_count = 14; +} + +// Model metadata specific to image object detection. +message ImageObjectDetectionModelMetadata { + // Optional. Type of the model. The available values are: + // * `cloud-high-accuracy-1` - (default) A model to be used via prediction + // calls to AutoML API. Expected to have a higher latency, but + // should also have a higher prediction quality than other + // models. + // * `cloud-low-latency-1` - A model to be used via prediction + // calls to AutoML API. Expected to have low latency, but may + // have lower prediction quality than other models. + string model_type = 1; + + // Output only. The number of nodes this model is deployed on. A node is an + // abstraction of a machine resource, which can handle online prediction QPS + // as given in the qps_per_node field. + int64 node_count = 3; + + // Output only. An approximate number of online prediction QPS that can + // be supported by this model per each node on which it is deployed. + double node_qps = 4; + + // Output only. The reason that this create model operation stopped, + // e.g. `BUDGET_REACHED`, `MODEL_CONVERGED`. + string stop_reason = 5; + + // The train budget of creating this model, expressed in milli node + // hours i.e. 1,000 value in this field means 1 node hour. The actual + // `train_cost` will be equal or less than this value. If further model + // training ceases to provide any improvements, it will stop without using + // full budget and the stop_reason will be `MODEL_CONVERGED`. + // Note, node_hour = actual_hour * number_of_nodes_invovled. + // For model type `cloud-high-accuracy-1`(default) and `cloud-low-latency-1`, + // the train budget must be between 20,000 and 900,000 milli node hours, + // inclusive. The default value is 216, 000 which represents one day in + // wall time. + // For model type `mobile-low-latency-1`, `mobile-versatile-1`, + // `mobile-high-accuracy-1`, `mobile-core-ml-low-latency-1`, + // `mobile-core-ml-versatile-1`, `mobile-core-ml-high-accuracy-1`, the train + // budget must be between 1,000 and 100,000 milli node hours, inclusive. + // The default value is 24, 000 which represents one day in wall time. + int64 train_budget_milli_node_hours = 6; + + // Output only. The actual train cost of creating this model, expressed in + // milli node hours, i.e. 1,000 value in this field means 1 node hour. + // Guaranteed to not exceed the train budget. + int64 train_cost_milli_node_hours = 7; +} + +// Model deployment metadata specific to Image Classification. +message ImageClassificationModelDeploymentMetadata { + // Input only. The number of nodes to deploy the model on. A node is an + // abstraction of a machine resource, which can handle online prediction QPS + // as given in the model's + // + // [node_qps][google.cloud.automl.v1.ImageClassificationModelMetadata.node_qps]. + // Must be between 1 and 100, inclusive on both ends. + int64 node_count = 1; +} + +// Model deployment metadata specific to Image Object Detection. +message ImageObjectDetectionModelDeploymentMetadata { + // Input only. The number of nodes to deploy the model on. A node is an + // abstraction of a machine resource, which can handle online prediction QPS + // as given in the model's + // + // [qps_per_node][google.cloud.automl.v1.ImageObjectDetectionModelMetadata.qps_per_node]. + // Must be between 1 and 100, inclusive on both ends. + int64 node_count = 1; +} diff --git a/google/cloud/automl/v1/io.proto b/google/cloud/automl/v1/io.proto index fd835d25..5c00df6e 100644 --- a/google/cloud/automl/v1/io.proto +++ b/google/cloud/automl/v1/io.proto @@ -18,28 +18,451 @@ syntax = "proto3"; package google.cloud.automl.v1; import "google/api/annotations.proto"; +import "google/api/field_behavior.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_package = "com.google.cloud.automl.v1"; option php_namespace = "Google\\Cloud\\AutoML\\V1"; option ruby_package = "Google::Cloud::AutoML::V1"; -// Input configuration for ImportData Action. +// Input configuration for +// [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData] action. // // The format of input depends on dataset_metadata the Dataset into which // the import is happening has. As input source the -// [gcs_source][google.cloud.automl.v1beta1.InputConfig.gcs_source] +// [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source] // is expected, unless specified otherwise. Additionally any input .CSV file // by itself must be 100MB or smaller, unless specified otherwise. // If an "example" file (that is, image, video etc.) with identical content -// (even if it had different GCS_FILE_PATH) is mentioned multiple times, then +// (even if it had different `GCS_FILE_PATH`) is mentioned multiple times, then // its label, bounding boxes etc. are appended. The same file should be always -// provided with the same ML_USE and GCS_FILE_PATH, if it is not, then +// provided with the same `ML_USE` and `GCS_FILE_PATH`, if it is not, then // these values are nondeterministically selected from the given ones. // -// Errors: +// The formats are represented in EBNF with commas being literal and with +// non-terminal symbols defined near the end of this comment. The formats are: +// +//

AutoML Vision

+// +// +//
Classification
+// +// See [Preparing your training +// data](https://cloud.google.com/vision/automl/docs/prepare) for more +// information. +// +// CSV file(s) with each line in format: +// +// ML_USE,GCS_FILE_PATH,LABEL,LABEL,... +// +// * `ML_USE` - Identifies the data set that the current row (file) applies +// to. +// This value can be one of the following: +// * `TRAIN` - Rows in this file are used to train the model. +// * `TEST` - Rows in this file are used to test the model during training. +// * `UNASSIGNED` - Rows in this file are not categorized. They are +// Automatically divided into train and test data. 80% for training and +// 20% for testing. +// +// * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to +// 30MB in size. Supported extensions: .JPEG, .GIF, .PNG, .WEBP, .BMP, +// .TIFF, .ICO. +// +// * `LABEL` - A label that identifies the object in the image. +// +// For the `MULTICLASS` classification type, at most one `LABEL` is allowed +// per image. If an image has not yet been labeled, then it should be +// mentioned just once with no `LABEL`. +// +// Some sample rows: +// +// TRAIN,gs://folder/image1.jpg,daisy +// TEST,gs://folder/image2.jpg,dandelion,tulip,rose +// UNASSIGNED,gs://folder/image3.jpg,daisy +// UNASSIGNED,gs://folder/image4.jpg +// +// +//
Object Detection
+// See [Preparing your training +// data](https://cloud.google.com/vision/automl/object-detection/docs/prepare) +// for more information. +// +// A CSV file(s) with each line in format: +// +// ML_USE,GCS_FILE_PATH,[LABEL],(BOUNDING_BOX | ,,,,,,,) +// +// * `ML_USE` - Identifies the data set that the current row (file) applies +// to. +// This value can be one of the following: +// * `TRAIN` - Rows in this file are used to train the model. +// * `TEST` - Rows in this file are used to test the model during training. +// * `UNASSIGNED` - Rows in this file are not categorized. They are +// Automatically divided into train and test data. 80% for training and +// 20% for testing. +// +// * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to +// 30MB in size. Supported extensions: .JPEG, .GIF, .PNG. Each image +// is assumed to be exhaustively labeled. +// +// * `LABEL` - A label that identifies the object in the image specified by the +// `BOUNDING_BOX`. +// +// * `BOUNDING BOX` - The vertices of an object in the example image. +// The minimum allowed `BOUNDING_BOX` edge length is 0.01, and no more than +// 500 `BOUNDING_BOX` instances per image are allowed (one `BOUNDING_BOX` +// per line). If an image has no looked for objects then it should be +// mentioned just once with no LABEL and the ",,,,,,," in place of the +// `BOUNDING_BOX`. +// +// **Four sample rows:** +// +// TRAIN,gs://folder/image1.png,car,0.1,0.1,,,0.3,0.3,, +// TRAIN,gs://folder/image1.png,bike,.7,.6,,,.8,.9,, +// UNASSIGNED,gs://folder/im2.png,car,0.1,0.1,0.2,0.1,0.2,0.3,0.1,0.3 +// TEST,gs://folder/im3.png,,,,,,,,, +//
+//
+// +// +//

AutoML Natural Language

+// +// +//
Entity Extraction
+// +// See [Preparing your training +// data](/natural-language/automl/entity-analysis/docs/prepare) for more +// information. +// +// One or more CSV file(s) with each line in the following format: +// +// ML_USE,GCS_FILE_PATH +// +// * `ML_USE` - Identifies the data set that the current row (file) applies +// to. +// This value can be one of the following: +// * `TRAIN` - Rows in this file are used to train the model. +// * `TEST` - Rows in this file are used to test the model during training. +// * `UNASSIGNED` - Rows in this file are not categorized. They are +// Automatically divided into train and test data. 80% for training and +// 20% for testing.. +// +// * `GCS_FILE_PATH` - a Identifies JSON Lines (.JSONL) file stored in +// Google Cloud Storage that contains in-line text in-line as documents +// for model training. +// +// After the training data set has been determined from the `TRAIN` and +// `UNASSIGNED` CSV files, the training data is divided into train and +// validation data sets. 70% for training and 30% for validation. +// +// For example: +// +// TRAIN,gs://folder/file1.jsonl +// VALIDATE,gs://folder/file2.jsonl +// TEST,gs://folder/file3.jsonl +// +// **In-line JSONL files** +// +// In-line .JSONL files contain, per line, a JSON document that wraps a +// [`text_snippet`][google.cloud.automl.v1.TextSnippet] field followed by +// one or more [`annotations`][google.cloud.automl.v1.AnnotationPayload] +// fields, which have `display_name` and `text_extraction` fields to describe +// the entity from the text snippet. Multiple JSON documents can be separated +// using line breaks (\n). +// +// The supplied text must be annotated exhaustively. For example, if you +// include the text "horse", but do not label it as "animal", +// then "horse" is assumed to not be an "animal". +// +// Any given text snippet content must have 30,000 characters or +// less, and also be UTF-8 NFC encoded. ASCII is accepted as it is +// UTF-8 NFC encoded. +// +// For example: +// +// { +// "text_snippet": { +// "content": "dog car cat" +// }, +// "annotations": [ +// { +// "display_name": "animal", +// "text_extraction": { +// "text_segment": {"start_offset": 0, "end_offset": 2} +// } +// }, +// { +// "display_name": "vehicle", +// "text_extraction": { +// "text_segment": {"start_offset": 4, "end_offset": 6} +// } +// }, +// { +// "display_name": "animal", +// "text_extraction": { +// "text_segment": {"start_offset": 8, "end_offset": 10} +// } +// } +// ] +// }\n +// { +// "text_snippet": { +// "content": "This dog is good." +// }, +// "annotations": [ +// { +// "display_name": "animal", +// "text_extraction": { +// "text_segment": {"start_offset": 5, "end_offset": 7} +// } +// } +// ] +// } +// +// **JSONL files that reference documents** +// +// .JSONL files contain, per line, a JSON document that wraps a +// `input_config` that contains the path to a source PDF document. +// Multiple JSON documents can be separated using line breaks (\n). +// +// For example: +// +// { +// "document": { +// "input_config": { +// "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ] +// } +// } +// } +// }\n +// { +// "document": { +// "input_config": { +// "gcs_source": { "input_uris": [ "gs://folder/document2.pdf" ] +// } +// } +// } +// } +// +// **In-line JSONL files with PDF layout information** +// +// **Note:** You can only annotate PDF files using the UI. The format described +// below applies to annotated PDF files exported using the UI or `exportData`. +// +// In-line .JSONL files for PDF documents contain, per line, a JSON document +// that wraps a `document` field that provides the textual content of the PDF +// document and the layout information. +// +// For example: +// +// { +// "document": { +// "document_text": { +// "content": "dog car cat" +// } +// "layout": [ +// { +// "text_segment": { +// "start_offset": 0, +// "end_offset": 11, +// }, +// "page_number": 1, +// "bounding_poly": { +// "normalized_vertices": [ +// {"x": 0.1, "y": 0.1}, +// {"x": 0.1, "y": 0.3}, +// {"x": 0.3, "y": 0.3}, +// {"x": 0.3, "y": 0.1}, +// ], +// }, +// "text_segment_type": TOKEN, +// } +// ], +// "document_dimensions": { +// "width": 8.27, +// "height": 11.69, +// "unit": INCH, +// } +// "page_count": 3, +// }, +// "annotations": [ +// { +// "display_name": "animal", +// "text_extraction": { +// "text_segment": {"start_offset": 0, "end_offset": 3} +// } +// }, +// { +// "display_name": "vehicle", +// "text_extraction": { +// "text_segment": {"start_offset": 4, "end_offset": 7} +// } +// }, +// { +// "display_name": "animal", +// "text_extraction": { +// "text_segment": {"start_offset": 8, "end_offset": 11} +// } +// }, +// ], +// +// +// +// +//
Classification
+// +// See [Preparing your training +// data](https://cloud.google.com/natural-language/automl/docs/prepare) for more +// information. +// +// One or more CSV file(s) with each line in the following format: +// +// ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),LABEL,LABEL,... +// +// * `ML_USE` - Identifies the data set that the current row (file) applies +// to. +// This value can be one of the following: +// * `TRAIN` - Rows in this file are used to train the model. +// * `TEST` - Rows in this file are used to test the model during training. +// * `UNASSIGNED` - Rows in this file are not categorized. They are +// Automatically divided into train and test data. 80% for training and +// 20% for testing. +// +// * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If +// the column content is a valid Google Cloud Storage file path, that is, +// prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if +// the content is enclosed in double quotes (""), it is treated as a +// `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a +// file with supported extension and UTF-8 encoding, for example, +// "gs://folder/content.txt" AutoML imports the file content +// as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content +// excluding quotes. In both cases, size of the content must be 10MB or +// less in size. For zip files, the size of each file inside the zip must be +// 10MB or less in size. +// +// For the `MULTICLASS` classification type, at most one `LABEL` is allowed. +// The `ML_USE` and `LABEL` columns are optional. +// Supported file extensions: .TXT, .PDF, .ZIP +// +// A maximum of 100 unique labels are allowed per CSV row. +// +// Sample rows: +// +// TRAIN,"They have bad food and very rude",RudeService,BadFood +// gs://folder/content.txt,SlowService +// TEST,gs://folder/document.pdf +// VALIDATE,gs://folder/text_files.zip,BadFood +// +// +// +//
Sentiment Analysis
+// +// See [Preparing your training +// data](https://cloud.google.com/natural-language/automl/docs/prepare) for more +// information. +// +// CSV file(s) with each line in format: +// +// ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),SENTIMENT +// +// * `ML_USE` - Identifies the data set that the current row (file) applies +// to. +// This value can be one of the following: +// * `TRAIN` - Rows in this file are used to train the model. +// * `TEST` - Rows in this file are used to test the model during training. +// * `UNASSIGNED` - Rows in this file are not categorized. They are +// Automatically divided into train and test data. 80% for training and +// 20% for testing. +// +// * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If +// the column content is a valid Google Cloud Storage file path, that is, +// prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if +// the content is enclosed in double quotes (""), it is treated as a +// `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a +// file with supported extension and UTF-8 encoding, for example, +// "gs://folder/content.txt" AutoML imports the file content +// as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content +// excluding quotes. In both cases, size of the content must be 128kB or +// less in size. For zip files, the size of each file inside the zip must be +// 128kB or less in size. +// +// The `ML_USE` and `SENTIMENT` columns are optional. +// Supported file extensions: .TXT, .PDF, .ZIP +// +// * `SENTIMENT` - An integer between 0 and +// Dataset.text_sentiment_dataset_metadata.sentiment_max +// (inclusive). Describes the ordinal of the sentiment - higher +// value means a more positive sentiment. All the values are +// completely relative, i.e. neither 0 needs to mean a negative or +// neutral sentiment nor sentiment_max needs to mean a positive one - +// it is just required that 0 is the least positive sentiment +// in the data, and sentiment_max is the most positive one. +// The SENTIMENT shouldn't be confused with "score" or "magnitude" +// from the previous Natural Language Sentiment Analysis API. +// All SENTIMENT values between 0 and sentiment_max must be +// represented in the imported data. On prediction the same 0 to +// sentiment_max range will be used. The difference between +// neighboring sentiment values needs not to be uniform, e.g. 1 and +// 2 may be similar whereas the difference between 2 and 3 may be +// large. +// +// Sample rows: +// +// TRAIN,"@freewrytin this is way too good for your product",2 +// gs://folder/content.txt,3 +// TEST,gs://folder/document.pdf +// VALIDATE,gs://folder/text_files.zip,2 +//
+//
+// +// +// **Input field definitions:** +// +// `ML_USE` +// : ("TRAIN" | "VALIDATE" | "TEST" | "UNASSIGNED") +// Describes how the given example (file) should be used for model +// training. "UNASSIGNED" can be used when user has no preference. +// +// `GCS_FILE_PATH` +// : The path to a file on Google Cloud Storage. For example, +// "gs://folder/image1.png". +// +// `LABEL` +// : A display name of an object on an image, video etc., e.g. "dog". +// Must be up to 32 characters long and can consist only of ASCII +// Latin letters A-Z and a-z, underscores(_), and ASCII digits 0-9. +// For each label an AnnotationSpec is created which display_name +// becomes the label; AnnotationSpecs are given back in predictions. +// +// `BOUNDING_BOX` +// : (`VERTEX,VERTEX,VERTEX,VERTEX` | `VERTEX,,,VERTEX,,`) +// A rectangle parallel to the frame of the example (image, +// video). If 4 vertices are given they are connected by edges +// in the order provided, if 2 are given they are recognized +// as diagonally opposite vertices of the rectangle. +// +// `VERTEX` +// : (`COORDINATE,COORDINATE`) +// First coordinate is horizontal (x), the second is vertical (y). +// +// `COORDINATE` +// : A float in 0 to 1 range, relative to total length of +// image or video in given dimension. For fractions the +// leading non-decimal 0 can be omitted (i.e. 0.3 = .3). +// Point 0,0 is in top left. +// +// `TEXT_SNIPPET` +// : The content of a text snippet, UTF-8 encoded, enclosed within +// double quotes (""). +// +// `DOCUMENT` +// : A field that provides the textual content with document and the layout +// information. +// +// +// **Errors:** +// // If any of the provided CSV files can't be parsed or if more than certain // percent of CSV rows cannot be processed then the operation fails and // nothing is imported. Regardless of overall success or failure the per-row @@ -50,8 +473,9 @@ message InputConfig { // The source of the input. oneof source { // The Google Cloud Storage location for the input content. - // In ImportData, the gcs_source points to a csv with structure described in - // the comment. + // For [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData], + // `gcs_source` points to a CSV file with a structure described in + // [InputConfig][google.cloud.automl.v1.InputConfig]. GcsSource gcs_source = 1; } @@ -61,40 +485,399 @@ message InputConfig { map params = 2; } -// * For Translation: +// Input configuration for BatchPredict Action. +// +// The format of input depends on the ML problem of the model used for +// prediction. As input source the +// [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source] +// is expected, unless specified otherwise. +// +// The formats are represented in EBNF with commas being literal and with +// non-terminal symbols defined near the end of this comment. The formats +// are: +// +//

AutoML Natural Language

+//
Classification
+// +// One or more CSV files where each line is a single column: +// +// GCS_FILE_PATH +// +// `GCS_FILE_PATH` is the Google Cloud Storage location of a text file. +// Supported file extensions: .TXT, .PDF +// Text files can be no larger than 10MB in size. +// +// Sample rows: +// +// gs://folder/text1.txt +// gs://folder/text2.pdf +// +//
Sentiment Analysis
+// One or more CSV files where each line is a single column: +// +// GCS_FILE_PATH +// +// `GCS_FILE_PATH` is the Google Cloud Storage location of a text file. +// Supported file extensions: .TXT, .PDF +// Text files can be no larger than 128kB in size. +// +// Sample rows: +// +// gs://folder/text1.txt +// gs://folder/text2.pdf +// +//
Entity Extraction
+// +// One or more JSONL (JSON Lines) files that either provide inline text or +// documents. You can only use one format, either inline text or documents, +// for a single call to [AutoMl.BatchPredict]. +// +// Each JSONL file contains a per line a proto that +// wraps a temporary user-assigned TextSnippet ID (string up to 2000 +// characters long) called "id", a TextSnippet proto (in +// JSON representation) and zero or more TextFeature protos. Any given +// text snippet content must have 30,000 characters or less, and also +// be UTF-8 NFC encoded (ASCII already is). The IDs provided should be +// unique. +// +// Each document JSONL file contains, per line, a proto that wraps a +// Document proto with `input_config` set. Only PDF documents are +// currently supported, and each PDF document cannot exceed 2MB in size. +// +// Each JSONL file must not exceed 100MB in size, and no more than 20 +// JSONL files may be passed. +// +// Sample inline JSONL file (Shown with artificial line +// breaks. Actual line breaks are denoted by "\n".): +// +// { +// "id": "my_first_id", +// "text_snippet": { "content": "dog car cat"}, +// "text_features": [ +// { +// "text_segment": {"start_offset": 4, "end_offset": 6}, +// "structural_type": PARAGRAPH, +// "bounding_poly": { +// "normalized_vertices": [ +// {"x": 0.1, "y": 0.1}, +// {"x": 0.1, "y": 0.3}, +// {"x": 0.3, "y": 0.3}, +// {"x": 0.3, "y": 0.1}, +// ] +// }, +// } +// ], +// }\n +// { +// "id": "2", +// "text_snippet": { +// "content": "Extended sample content", +// "mime_type": "text/plain" +// } +// } +// +// Sample document JSONL file (Shown with artificial line +// breaks. Actual line breaks are denoted by "\n".): +// +// { +// "document": { +// "input_config": { +// "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ] +// } +// } +// } +// }\n +// { +// "document": { +// "input_config": { +// "gcs_source": { "input_uris": [ "gs://folder/document2.pdf" ] +// } +// } +// } +// } +//
+//
+// +// **Input field definitions:** +// +// `GCS_FILE_PATH` +// : The path to a file on Google Cloud Storage. For example, +// "gs://folder/video.avi". +// +// **Errors:** +// +// If any of the provided CSV files can't be parsed or if more than certain +// percent of CSV rows cannot be processed then the operation fails and +// prediction does not happen. Regardless of overall success or failure the +// per-row failures, up to a certain count cap, will be listed in +// Operation.metadata.partial_failures. +message BatchPredictInputConfig { + // The source of the input. + oneof source { + // Required. The Google Cloud Storage location for the input content. + GcsSource gcs_source = 1 [(google.api.field_behavior) = REQUIRED]; + } +} + +// Input configuration of an [Document][google.cloud.automl.v1.Image]. +message ImageInputConfig { + // The Google Cloud Storage location of the document file. Only a single path + // should be given. + GcsSource gcs_source = 1; +} + +// Input configuration of a [Document][google.cloud.automl.v1.Document]. +message DocumentInputConfig { + // The Google Cloud Storage location of the document file. Only a single path + // should be given. + // + // Max supported size: 512MB. + // + // Supported extensions: .PDF. + GcsSource gcs_source = 1; +} + +// Output configuration for ExportData. +// +// As destination the +// [gcs_destination][google.cloud.automl.v1.OutputConfig.gcs_destination] +// must be set unless specified otherwise for a domain. If gcs_destination is +// set then in the given directory a new directory is created. Its name +// will be "export_data--", +// where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. +// Only ground truth annotations are exported (not approved annotations are +// not exported). +// +// The outputs correspond to how the data was imported, and may be used as +// input to import data. The output formats are represented as EBNF with literal +// commas and same non-terminal symbols definitions are these in import data's +// [InputConfig][google.cloud.automl.v1.InputConfig]: +// +// * For Image Classification: +// CSV file(s) `image_classification_1.csv`, +// `image_classification_2.csv`,...,`image_classification_N.csv`with +// each line in format: +// ML_USE,GCS_FILE_PATH,LABEL,LABEL,... +// where GCS_FILE_PATHs point at the original, source locations of the +// imported images. +// For MULTICLASS classification type, there can be at most one LABEL +// per example. +// +// * For Image Object Detection: +// CSV file(s) `image_object_detection_1.csv`, +// `image_object_detection_2.csv`,...,`image_object_detection_N.csv` +// with each line in format: +// ML_USE,GCS_FILE_PATH,[LABEL],(BOUNDING_BOX | ,,,,,,,) +// where GCS_FILE_PATHs point at the original, source locations of the +// imported images. +// +// * For Text Classification: +// In the created directory CSV file(s) `text_classification_1.csv`, +// `text_classification_2.csv`, ...,`text_classification_N.csv` will be +// created where N depends on the total number of examples exported. +// Each line in the CSV is of the format: +// ML_USE,GCS_FILE_PATH,LABEL,LABEL,... +// where GCS_FILE_PATHs point at the exported .txt files containing +// the text content of the imported example. For MULTICLASS +// classification type, there will be at most one LABEL per example. +// +// * For Text Sentiment: +// In the created directory CSV file(s) `text_sentiment_1.csv`, +// `text_sentiment_2.csv`, ...,`text_sentiment_N.csv` will be +// created where N depends on the total number of examples exported. +// Each line in the CSV is of the format: +// ML_USE,GCS_FILE_PATH,SENTIMENT +// where GCS_FILE_PATHs point at the exported .txt files containing +// the text content of the imported example. +// +// * For Text Extraction: +// CSV file `text_extraction.csv`, with each line in format: +// ML_USE,GCS_FILE_PATH +// GCS_FILE_PATH leads to a .JSONL (i.e. JSON Lines) file which +// contains, per line, a proto that wraps a TextSnippet proto (in json +// representation) followed by AnnotationPayload protos (called +// annotations). If initially documents had been imported, the JSONL +// will point at the original, source locations of the imported +// documents. +// +// * For Translation: // CSV file `translation.csv`, with each line in format: // ML_USE,GCS_FILE_PATH // GCS_FILE_PATH leads to a .TSV file which describes examples that have // given ML_USE, using the following row format per line: // TEXT_SNIPPET (in source language) \t TEXT_SNIPPET (in target // language) -// -// `export_data__` -// where will be made -// BigQuery-dataset-name compatible (e.g. most special characters will -// become underscores), and timestamp will be in -// YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In that -// dataset a new table called `primary_table` will be created, and -// filled with precisely the same data as this obtained on import. message OutputConfig { - // Required. The destination of the output. + // The destination of the output. oneof destination { - // The Google Cloud Storage location where the output is to be written to. - // For Image Object Detection, Text Extraction, Video Classification and - // Tables, in the given directory a new directory will be created with name: + // Required. The Google Cloud Storage location where the output is to be + // written to. For Image Object Detection, Text Extraction in the given + // directory a new directory will be created with name: // export_data-- where // timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export // output will be written into that directory. - GcsDestination gcs_destination = 1; + GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED]; } } +// Output configuration for BatchPredict Action. +// +// As destination the +// +// [gcs_destination][google.cloud.automl.v1.BatchPredictOutputConfig.gcs_destination] +// must be set unless specified otherwise for a domain. If gcs_destination is +// set then in the given directory a new directory is created. Its name +// will be +// "prediction--", +// where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. The contents +// of it depends on the ML problem the predictions are made for. +// +// * For Text Classification: +// In the created directory files `text_classification_1.jsonl`, +// `text_classification_2.jsonl`,...,`text_classification_N.jsonl` +// will be created, where N may be 1, and depends on the +// total number of inputs and annotations found. +// +// Each .JSONL file will contain, per line, a JSON representation of a +// proto that wraps input text (or pdf) file in +// the text snippet (or document) proto and a list of +// zero or more AnnotationPayload protos (called annotations), which +// have classification detail populated. A single text (or pdf) file +// will be listed only once with all its annotations, and its +// annotations will never be split across files. +// +// If prediction for any text (or pdf) file failed (partially or +// completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,..., +// `errors_N.jsonl` files will be created (N depends on total number of +// failed predictions). These files will have a JSON representation of a +// proto that wraps input text (or pdf) file followed by exactly one +// +// [`google.rpc.Status`](https: +// //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) +// containing only `code` and `message`. +// +// * For Text Sentiment: +// In the created directory files `text_sentiment_1.jsonl`, +// `text_sentiment_2.jsonl`,...,`text_sentiment_N.jsonl` +// will be created, where N may be 1, and depends on the +// total number of inputs and annotations found. +// +// Each .JSONL file will contain, per line, a JSON representation of a +// proto that wraps input text (or pdf) file in +// the text snippet (or document) proto and a list of +// zero or more AnnotationPayload protos (called annotations), which +// have text_sentiment detail populated. A single text (or pdf) file +// will be listed only once with all its annotations, and its +// annotations will never be split across files. +// +// If prediction for any text (or pdf) file failed (partially or +// completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,..., +// `errors_N.jsonl` files will be created (N depends on total number of +// failed predictions). These files will have a JSON representation of a +// proto that wraps input text (or pdf) file followed by exactly one +// +// [`google.rpc.Status`](https: +// //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) +// containing only `code` and `message`. +// +// * For Text Extraction: +// In the created directory files `text_extraction_1.jsonl`, +// `text_extraction_2.jsonl`,...,`text_extraction_N.jsonl` +// will be created, where N may be 1, and depends on the +// total number of inputs and annotations found. +// The contents of these .JSONL file(s) depend on whether the input +// used inline text, or documents. +// If input was inline, then each .JSONL file will contain, per line, +// a JSON representation of a proto that wraps given in request text +// snippet's "id" (if specified), followed by input text snippet, +// and a list of zero or more +// AnnotationPayload protos (called annotations), which have +// text_extraction detail populated. A single text snippet will be +// listed only once with all its annotations, and its annotations will +// never be split across files. +// If input used documents, then each .JSONL file will contain, per +// line, a JSON representation of a proto that wraps given in request +// document proto, followed by its OCR-ed representation in the form +// of a text snippet, finally followed by a list of zero or more +// AnnotationPayload protos (called annotations), which have +// text_extraction detail populated and refer, via their indices, to +// the OCR-ed text snippet. A single document (and its text snippet) +// will be listed only once with all its annotations, and its +// annotations will never be split across files. +// If prediction for any text snippet failed (partially or completely), +// then additional `errors_1.jsonl`, `errors_2.jsonl`,..., +// `errors_N.jsonl` files will be created (N depends on total number of +// failed predictions). These files will have a JSON representation of a +// proto that wraps either the "id" : "" (in case of inline) +// or the document proto (in case of document) but here followed by +// exactly one [`google.rpc.Status`](https: +// //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) +// containing only `code` and `message`. +message BatchPredictOutputConfig { + // The destination of the output. + oneof destination { + // Required. The Google Cloud Storage location of the directory where the + // output is to be written to. + GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED]; + } +} + +// Output configuration for ModelExport Action. +message ModelExportOutputConfig { + // The destination of the output. + oneof destination { + // Required. The Google Cloud Storage location where the model is to be + // written to. This location may only be set for the following model + // formats: + // "tflite", "edgetpu_tflite", "tf_saved_model", "tf_js", "core_ml". + // + // Under the directory given as the destination a new one with name + // "model-export--", + // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format, + // will be created. Inside the model and any of its supporting files + // will be written. + GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED]; + } + + // The format in which the model must be exported. The available, and default, + // formats depend on the problem and model type (if given problem and type + // combination doesn't have a format listed, it means its models are not + // exportable): + // + // * For Image Classification mobile-low-latency-1, mobile-versatile-1, + // mobile-high-accuracy-1: + // "tflite" (default), "edgetpu_tflite", "tf_saved_model", "tf_js". + // + // * For Image Classification mobile-core-ml-low-latency-1, + // mobile-core-ml-versatile-1, mobile-core-ml-high-accuracy-1: + // "core_ml" (default). + // + // * For Image Object Detection mobile-low-latency-1, mobile-versatile-1, + // mobile-high-accuracy-1: + // "tflite", "tf_saved_model", "tf_js". + // Formats description: + // + // * tflite - Used for Android mobile devices. + // * edgetpu_tflite - Used for [Edge TPU](https://cloud.google.com/edge-tpu/) + // devices. + // * tf_saved_model - A tensorflow model in SavedModel format. + // * tf_js - A [TensorFlow.js](https://www.tensorflow.org/js) model that can + // be used in the browser and in Node.js using JavaScript.x` + // * core_ml - Used for iOS mobile devices. + string model_format = 4; + + // Additional model-type and format specific parameters describing the + // requirements for the to be exported model files, any string must be up to + // 25000 characters long. + map params = 2; +} + // The Google Cloud Storage location for the input content. message GcsSource { - // Required. Google Cloud Storage URIs to input files, up to 2000 characters - // long. Accepted forms: + // Required. Google Cloud Storage URIs to input files, up to 2000 + // characters long. Accepted forms: // * Full object path, e.g. gs://bucket/directory/object.csv - repeated string input_uris = 1; + repeated string input_uris = 1 [(google.api.field_behavior) = REQUIRED]; } // The Google Cloud Storage location where the output is to be written to. @@ -105,5 +888,5 @@ message GcsDestination { // * Prefix path: gs://bucket/directory // The requesting user must have write permission to the bucket. // The directory is created if it doesn't exist. - string output_uri_prefix = 1; + string output_uri_prefix = 1 [(google.api.field_behavior) = REQUIRED]; } diff --git a/google/cloud/automl/v1/model.proto b/google/cloud/automl/v1/model.proto index 5f820b42..51ca9042 100644 --- a/google/cloud/automl/v1/model.proto +++ b/google/cloud/automl/v1/model.proto @@ -17,12 +17,15 @@ syntax = "proto3"; package google.cloud.automl.v1; +import "google/api/resource.proto"; +import "google/cloud/automl/v1/image.proto"; +import "google/cloud/automl/v1/text.proto"; import "google/cloud/automl/v1/translation.proto"; import "google/protobuf/timestamp.proto"; import "google/api/annotations.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_package = "com.google.cloud.automl.v1"; option php_namespace = "Google\\Cloud\\AutoML\\V1"; @@ -30,6 +33,11 @@ option ruby_package = "Google::Cloud::AutoML::V1"; // API proto representing a trained machine learning model. message Model { + option (google.api.resource) = { + type: "automl.googleapis.com/Model" + pattern: "projects/{project_id}/locations/{location_id}/models/{model_id}" + }; + // Deployment state of the model. enum DeploymentState { // Should not be used, an un-set enum has this value by default. @@ -48,6 +56,21 @@ message Model { oneof model_metadata { // Metadata for translation models. TranslationModelMetadata translation_model_metadata = 15; + + // Metadata for image classification models. + ImageClassificationModelMetadata image_classification_model_metadata = 13; + + // Metadata for text classification models. + TextClassificationModelMetadata text_classification_model_metadata = 14; + + // Metadata for image object detection models. + ImageObjectDetectionModelMetadata image_object_detection_model_metadata = 20; + + // Metadata for text extraction models. + TextExtractionModelMetadata text_extraction_model_metadata = 19; + + // Metadata for text sentiment models. + TextSentimentModelMetadata text_sentiment_model_metadata = 22; } // Output only. Resource name of the model. @@ -74,6 +97,10 @@ message Model { // prediction requests after it gets deployed. DeploymentState deployment_state = 8; + // Used to perform a consistent read-modify-write updates. If not set, a blind + // "overwrite" update happens. + string etag = 10; + // Optional. The labels with user-defined metadata to organize your model. // // Label keys and values can be no longer than 64 characters diff --git a/google/cloud/automl/v1/model_evaluation.proto b/google/cloud/automl/v1/model_evaluation.proto index fe9df1b9..149fa4e8 100644 --- a/google/cloud/automl/v1/model_evaluation.proto +++ b/google/cloud/automl/v1/model_evaluation.proto @@ -17,12 +17,17 @@ syntax = "proto3"; package google.cloud.automl.v1; +import "google/api/annotations.proto"; +import "google/api/resource.proto"; +import "google/cloud/automl/v1/classification.proto"; +import "google/cloud/automl/v1/detection.proto"; +import "google/cloud/automl/v1/text_extraction.proto"; +import "google/cloud/automl/v1/text_sentiment.proto"; import "google/cloud/automl/v1/translation.proto"; import "google/protobuf/timestamp.proto"; -import "google/api/annotations.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_package = "com.google.cloud.automl.v1"; option php_namespace = "Google\\Cloud\\AutoML\\V1"; @@ -32,8 +37,21 @@ option ruby_package = "Google::Cloud::AutoML::V1"; message ModelEvaluation { // Output only. Problem type specific evaluation metrics. oneof metrics { + // Model evaluation metrics for image, text classification. + ClassificationEvaluationMetrics classification_evaluation_metrics = 8; + // Model evaluation metrics for translation. TranslationEvaluationMetrics translation_evaluation_metrics = 9; + + // Model evaluation metrics for image object detection. + ImageObjectDetectionEvaluationMetrics + image_object_detection_evaluation_metrics = 12; + + // Evaluation metrics for text sentiment models. + TextSentimentEvaluationMetrics text_sentiment_evaluation_metrics = 11; + + // Evaluation metrics for text extraction models. + TextExtractionEvaluationMetrics text_extraction_evaluation_metrics = 13; } // Output only. Resource name of the model evaluation. @@ -42,10 +60,18 @@ message ModelEvaluation { // `projects/{project_id}/locations/{location_id}/models/{model_id}/modelEvaluations/{model_evaluation_id}` string name = 1; - // Output only. The ID of the annotation spec that the model evaluation applies to. The - // The ID is empty for the overall model evaluation. + // Output only. The ID of the annotation spec that the model evaluation + // applies to. The The ID is empty for the overall model evaluation. string annotation_spec_id = 2; + // Output only. The value of + // [display_name][google.cloud.automl.v1.AnnotationSpec.display_name] + // at the moment when the model was trained. Because this field returns a + // value at model training time, for different models trained from the same + // dataset, the values may differ, since display names could had been changed + // between the two model's trainings. + string display_name = 15; + // Output only. Timestamp when this model evaluation was created. google.protobuf.Timestamp create_time = 5; @@ -57,6 +83,6 @@ message ModelEvaluation { // Otherwise, this is the count of examples that according to the ground // truth were annotated by the // - // [annotation_spec_id][google.cloud.automl.v1beta1.ModelEvaluation.annotation_spec_id]. + // [annotation_spec_id][google.cloud.automl.v1.ModelEvaluation.annotation_spec_id]. int32 evaluated_example_count = 6; } diff --git a/google/cloud/automl/v1/operations.proto b/google/cloud/automl/v1/operations.proto index 575fd8a4..7c09d6ec 100644 --- a/google/cloud/automl/v1/operations.proto +++ b/google/cloud/automl/v1/operations.proto @@ -17,16 +17,19 @@ syntax = "proto3"; package google.cloud.automl.v1; +import "google/cloud/automl/v1/dataset.proto"; import "google/cloud/automl/v1/io.proto"; import "google/cloud/automl/v1/model.proto"; import "google/cloud/automl/v1/model_evaluation.proto"; +import "google/cloud/automl/v1/prediction_service.proto"; +import "google/cloud/automl/v1/service.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/rpc/status.proto"; import "google/api/annotations.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_package = "com.google.cloud.automl.v1"; option php_namespace = "Google\\Cloud\\AutoML\\V1"; @@ -40,8 +43,29 @@ message OperationMetadata { // Details of a Delete operation. DeleteOperationMetadata delete_details = 8; + // Details of a DeployModel operation. + DeployModelOperationMetadata deploy_model_details = 24; + + // Details of an UndeployModel operation. + UndeployModelOperationMetadata undeploy_model_details = 25; + // Details of CreateModel operation. CreateModelOperationMetadata create_model_details = 10; + + // Details of CreateDataset operation. + CreateDatasetOperationMetadata create_dataset_details = 30; + + // Details of ImportData operation. + ImportDataOperationMetadata import_data_details = 15; + + // Details of BatchPredict operation. + BatchPredictOperationMetadata batch_predict_details = 16; + + // Details of ExportData operation. + ExportDataOperationMetadata export_data_details = 21; + + // Details of ExportModel operation. + ExportModelOperationMetadata export_model_details = 22; } // Output only. Progress of operation. Range: [0, 100]. @@ -66,7 +90,84 @@ message DeleteOperationMetadata { } +// Details of DeployModel operation. +message DeployModelOperationMetadata { + +} + +// Details of UndeployModel operation. +message UndeployModelOperationMetadata { + +} + +// Details of CreateDataset operation. +message CreateDatasetOperationMetadata { + +} + // Details of CreateModel operation. message CreateModelOperationMetadata { } + +// Details of ImportData operation. +message ImportDataOperationMetadata { + +} + +// Details of ExportData operation. +message ExportDataOperationMetadata { + // Further describes this export data's output. + // Supplements + // [OutputConfig][google.cloud.automl.v1.OutputConfig]. + message ExportDataOutputInfo { + // The output location to which the exported data is written. + oneof output_location { + // The full path of the Google Cloud Storage directory created, into which + // the exported data is written. + string gcs_output_directory = 1; + } + } + + // Output only. Information further describing this export data's output. + ExportDataOutputInfo output_info = 1; +} + +// Details of BatchPredict operation. +message BatchPredictOperationMetadata { + // Further describes this batch predict's output. + // Supplements + // + // [BatchPredictOutputConfig][google.cloud.automl.v1.BatchPredictOutputConfig]. + message BatchPredictOutputInfo { + // The output location into which prediction output is written. + oneof output_location { + // The full path of the Google Cloud Storage directory created, into which + // the prediction output is written. + string gcs_output_directory = 1; + } + } + + // Output only. The input config that was given upon starting this + // batch predict operation. + BatchPredictInputConfig input_config = 1; + + // Output only. Information further describing this batch predict's output. + BatchPredictOutputInfo output_info = 2; +} + +// Details of ExportModel operation. +message ExportModelOperationMetadata { + // Further describes the output of model export. + // Supplements + // [ModelExportOutputConfig][google.cloud.automl.v1.ModelExportOutputConfig]. + message ExportModelOutputInfo { + // The full path of the Google Cloud Storage directory created, into which + // the model will be exported. + string gcs_output_directory = 1; + } + + // Output only. Information further describing the output of this model + // export. + ExportModelOutputInfo output_info = 2; +} diff --git a/google/cloud/automl/v1/prediction_service.proto b/google/cloud/automl/v1/prediction_service.proto index a5f02873..9c175d9e 100644 --- a/google/cloud/automl/v1/prediction_service.proto +++ b/google/cloud/automl/v1/prediction_service.proto @@ -19,14 +19,14 @@ package google.cloud.automl.v1; import "google/api/annotations.proto"; import "google/api/client.proto"; +import "google/api/resource.proto"; import "google/cloud/automl/v1/annotation_payload.proto"; import "google/cloud/automl/v1/data_items.proto"; import "google/cloud/automl/v1/io.proto"; -import "google/cloud/automl/v1/operations.proto"; import "google/longrunning/operations.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_outer_classname = "PredictionServiceProto"; option java_package = "com.google.cloud.automl.v1"; @@ -45,14 +45,43 @@ service PredictionService { // Perform an online prediction. The prediction result will be directly // returned in the response. // Available for following ML problems, and their expected request payloads: + // * Image Classification - Image in .JPEG, .GIF or .PNG format, image_bytes + // up to 30MB. + // * Image Object Detection - Image in .JPEG, .GIF or .PNG format, image_bytes + // up to 30MB. + // * Text Classification - TextSnippet, content up to 60,000 characters, + // UTF-8 encoded. + // * Text Extraction - TextSnippet, content up to 30,000 characters, + // UTF-8 NFC encoded. // * Translation - TextSnippet, content up to 25,000 characters, UTF-8 // encoded. + // * Text Sentiment - TextSnippet, content up 500 characters, UTF-8 + // encoded. rpc Predict(PredictRequest) returns (PredictResponse) { option (google.api.http) = { post: "/v1/{name=projects/*/locations/*/models/*}:predict" body: "*" }; } + + // Perform a batch prediction. Unlike the online + // [Predict][google.cloud.automl.v1.PredictionService.Predict], batch + // prediction result won't be immediately available in the response. Instead, + // a long running operation object is returned. User can poll the operation + // result via [GetOperation][google.longrunning.Operations.GetOperation] + // method. Once the operation is done, + // [BatchPredictResult][google.cloud.automl.v1.BatchPredictResult] is returned + // in the [response][google.longrunning.Operation.response] field. Available + // for following ML problems: + // * Image Classification + // * Image Object Detection + // * Text Extraction + rpc BatchPredict(BatchPredictRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { + post: "/v1/{name=projects/*/locations/*/models/*}:batchPredict" + body: "*" + }; + } } // Request message for @@ -67,6 +96,20 @@ message PredictRequest { // Additional domain-specific parameters, any string must be up to 25000 // characters long. + // + // * For Image Classification: + // + // `score_threshold` - (float) A value from 0.0 to 1.0. When the model + // makes predictions for an image, it will only produce results that have + // at least this confidence score. The default is 0.5. + // + // * For Image Object Detection: + // `score_threshold` - (float) When Model detects objects on the image, + // it will only produce bounding boxes which have at least this + // confidence score. Value in 0 to 1 range, default is 0.5. + // `max_bounding_box_count` - (int64) No more than this number of bounding + // boxes will be returned in the response. Default is 100, the + // requested value may be limited by server. map params = 3; } @@ -77,6 +120,87 @@ message PredictResponse { // Translation and Text Sentiment will return precisely one payload. repeated AnnotationPayload payload = 1; + // The preprocessed example that AutoML actually makes prediction on. + // Empty if AutoML does not preprocess the input example. + // * For Text Extraction: + // If the input is a .pdf file, the OCR'ed text will be provided in + // [document_text][google.cloud.automl.v1p1beta.Document.document_text]. + // + // * For Text Classification: + // If the input is a .pdf file, the OCR'ed trucated text will be provided in + // [document_text][google.cloud.automl.v1p1beta.Document.document_text]. + // + // * For Text Sentiment: + // If the input is a .pdf file, the OCR'ed trucated text will be provided in + // [document_text][google.cloud.automl.v1p1beta.Document.document_text]. + ExamplePayload preprocessed_input = 3; + // Additional domain-specific prediction response metadata. + // + // * For Image Object Detection: + // `max_bounding_box_count` - (int64) At most that many bounding boxes per + // image could have been returned. + // + // * For Text Sentiment: + // `sentiment_score` - (float, deprecated) A value between -1 and 1, + // -1 maps to least positive sentiment, while 1 maps to the most positive + // one and the higher the score, the more positive the sentiment in the + // document is. Yet these values are relative to the training data, so + // e.g. if all data was positive then -1 will be also positive (though + // the least). + // The sentiment_score shouldn't be confused with "score" or "magnitude" + // from the previous Natural Language Sentiment Analysis API. map metadata = 2; } + +// Request message for +// [PredictionService.BatchPredict][google.cloud.automl.v1.PredictionService.BatchPredict]. +message BatchPredictRequest { + // Name of the model requested to serve the batch prediction. + string name = 1; + + // Required. The input configuration for batch prediction. + BatchPredictInputConfig input_config = 3; + + // Required. The Configuration specifying where output predictions should + // be written. + BatchPredictOutputConfig output_config = 4; + + // Additional domain-specific parameters for the predictions, any string must + // be up to 25000 characters long. + // + // * For Text Classification: + // + // `score_threshold` - (float) A value from 0.0 to 1.0. When the model + // makes predictions for a text snippet, it will only produce results + // that have at least this confidence score. The default is 0.5. + // + // * For Image Classification: + // + // `score_threshold` - (float) A value from 0.0 to 1.0. When the model + // makes predictions for an image, it will only produce results that + // have at least this confidence score. The default is 0.5. + // + // * For Image Object Detection: + // + // `score_threshold` - (float) When Model detects objects on the image, + // it will only produce bounding boxes which have at least this + // confidence score. Value in 0 to 1 range, default is 0.5. + // `max_bounding_box_count` - (int64) No more than this number of bounding + // boxes will be produced per image. Default is 100, the + // requested value may be limited by server. + map params = 5; +} + +// Result of the Batch Predict. This message is returned in +// [response][google.longrunning.Operation.response] of the operation returned +// by the +// [PredictionService.BatchPredict][google.cloud.automl.v1.PredictionService.BatchPredict]. +message BatchPredictResult { + // Additional domain-specific prediction response metadata. + // + // * For Image Object Detection: + // `max_bounding_box_count` - (int64) At most that many bounding boxes per + // image could have been returned. + map metadata = 1; +} diff --git a/google/cloud/automl/v1/service.proto b/google/cloud/automl/v1/service.proto index c994d95b..36501f23 100644 --- a/google/cloud/automl/v1/service.proto +++ b/google/cloud/automl/v1/service.proto @@ -19,17 +19,19 @@ package google.cloud.automl.v1; import "google/api/annotations.proto"; import "google/api/client.proto"; +import "google/api/resource.proto"; import "google/cloud/automl/v1/annotation_payload.proto"; +import "google/cloud/automl/v1/annotation_spec.proto"; import "google/cloud/automl/v1/dataset.proto"; +import "google/cloud/automl/v1/image.proto"; import "google/cloud/automl/v1/io.proto"; import "google/cloud/automl/v1/model.proto"; import "google/cloud/automl/v1/model_evaluation.proto"; -import "google/cloud/automl/v1/operations.proto"; import "google/longrunning/operations.proto"; import "google/protobuf/field_mask.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_outer_classname = "AutoMlProto"; option java_package = "com.google.cloud.automl.v1"; @@ -116,6 +118,14 @@ service AutoMl { }; } + // Gets an annotation spec. + rpc GetAnnotationSpec(GetAnnotationSpecRequest) + returns (google.cloud.automl.v1.AnnotationSpec) { + option (google.api.http) = { + get: "/v1/{name=projects/*/locations/*/datasets/*/annotationSpecs/*}" + }; + } + // Creates a model. // Returns a Model in the [response][google.longrunning.Operation.response] // field when it completes. @@ -161,6 +171,54 @@ service AutoMl { }; } + // Deploys a model. If a model is already deployed, deploying it with the + // same parameters has no effect. Deploying with different parametrs + // (as e.g. changing + // + // [node_number][google.cloud.automl.v1p1beta.ImageObjectDetectionModelDeploymentMetadata.node_number]) + // will reset the deployment state without pausing the model's availability. + // + // Only applicable for Text Classification, Image Object Detection; all other + // domains manage deployment automatically. + // + // Returns an empty response in the + // [response][google.longrunning.Operation.response] field when it completes. + rpc DeployModel(DeployModelRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { + post: "/v1/{name=projects/*/locations/*/models/*}:deploy" + body: "*" + }; + } + + // Undeploys a model. If the model is not deployed this method has no effect. + // + // Only applicable for Text Classification, Image Object Detection; + // all other domains manage deployment automatically. + // + // Returns an empty response in the + // [response][google.longrunning.Operation.response] field when it completes. + rpc UndeployModel(UndeployModelRequest) + returns (google.longrunning.Operation) { + option (google.api.http) = { + post: "/v1/{name=projects/*/locations/*/models/*}:undeploy" + body: "*" + }; + } + + // Exports a trained, "export-able", model to a user specified Google Cloud + // Storage location. A model is considered export-able if and only if it has + // an export format defined for it in + // [ModelExportOutputConfig][google.cloud.automl.v1.ModelExportOutputConfig]. + // + // Returns an empty response in the + // [response][google.longrunning.Operation.response] field when it completes. + rpc ExportModel(ExportModelRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { + post: "/v1/{name=projects/*/locations/*/models/*}:export" + body: "*" + }; + } + // Gets a model evaluation. rpc GetModelEvaluation(GetModelEvaluationRequest) returns (ModelEvaluation) { option (google.api.http) = { @@ -203,8 +261,8 @@ message ListDatasetsRequest { // An expression for filtering the results of the request. // // * `dataset_metadata` - for existence of the case (e.g. - // image_classification_dataset_metadata:*). - // Some examples of using the filter are: + // image_classification_dataset_metadata:*). Some examples of + // using the filter are: // // * `translation_dataset_metadata:*` --> The dataset has // translation_dataset_metadata. @@ -274,6 +332,13 @@ message ExportDataRequest { OutputConfig output_config = 3; } +// Request message for +// [AutoMl.GetAnnotationSpec][google.cloud.automl.v1.AutoMl.GetAnnotationSpec]. +message GetAnnotationSpecRequest { + // The resource name of the annotation spec to retrieve. + string name = 1; +} + // Request message for // [AutoMl.CreateModel][google.cloud.automl.v1.AutoMl.CreateModel]. message CreateModelRequest { @@ -300,7 +365,7 @@ message ListModelsRequest { // An expression for filtering the results of the request. // // * `model_metadata` - for existence of the case (e.g. - // video_classification_model_metadata:*). + // image_classification_model_metadata:*). // * `dataset_id` - for = or !=. Some examples of using the filter are: // // * `image_classification_model_metadata:*` --> The model has @@ -349,6 +414,42 @@ message UpdateModelRequest { google.protobuf.FieldMask update_mask = 2; } +// Request message for +// [AutoMl.DeployModel][google.cloud.automl.v1.AutoMl.DeployModel]. +message DeployModelRequest { + // The per-domain specific deployment parameters. + oneof model_deployment_metadata { + // Model deployment metadata specific to Image Object Detection. + ImageObjectDetectionModelDeploymentMetadata + image_object_detection_model_deployment_metadata = 2; + + // Model deployment metadata specific to Image Classification. + ImageClassificationModelDeploymentMetadata + image_classification_model_deployment_metadata = 4; + } + + // Resource name of the model to deploy. + string name = 1; +} + +// Request message for +// [AutoMl.UndeployModel][google.cloud.automl.v1.AutoMl.UndeployModel]. +message UndeployModelRequest { + // Resource name of the model to undeploy. + string name = 1; +} + +// Request message for +// [AutoMl.ExportModel][google.cloud.automl.v1.AutoMl.ExportModel]. Models need +// to be enabled for exporting, otherwise an error code will be returned. +message ExportModelRequest { + // Required. The resource name of the model to export. + string name = 1; + + // Required. The desired output location and configuration. + ModelExportOutputConfig output_config = 3; +} + // Request message for // [AutoMl.GetModelEvaluation][google.cloud.automl.v1.AutoMl.GetModelEvaluation]. message GetModelEvaluationRequest { diff --git a/google/cloud/automl/v1/text.proto b/google/cloud/automl/v1/text.proto new file mode 100644 index 00000000..7c47d9b3 --- /dev/null +++ b/google/cloud/automl/v1/text.proto @@ -0,0 +1,60 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.automl.v1; + +import "google/api/annotations.proto"; +import "google/cloud/automl/v1/classification.proto"; + +option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; +option java_multiple_files = true; +option java_outer_classname = "TextProto"; +option java_package = "com.google.cloud.automl.v1"; +option php_namespace = "Google\\Cloud\\AutoML\\V1"; +option ruby_package = "Google::Cloud::AutoML::V1"; + +// Dataset metadata for classification. +message TextClassificationDatasetMetadata { + // Required. Type of the classification problem. + ClassificationType classification_type = 1; +} + +// Model metadata that is specific to text classification. +message TextClassificationModelMetadata { + // Output only. Classification type of the dataset used to train this model. + ClassificationType classification_type = 3; +} + +// Dataset metadata that is specific to text extraction +message TextExtractionDatasetMetadata {} + +// Model metadata that is specific to text extraction. +message TextExtractionModelMetadata {} + +// Dataset metadata for text sentiment. +message TextSentimentDatasetMetadata { + // Required. A sentiment is expressed as an integer ordinal, where higher + // value means a more positive sentiment. The range of sentiments that will be + // used is between 0 and sentiment_max (inclusive on both ends), and all the + // values in the range must be represented in the dataset before a model can + // be created. sentiment_max value must be between 1 and 10 (inclusive). + int32 sentiment_max = 1; +} + +// Model metadata that is specific to text sentiment. +message TextSentimentModelMetadata {} diff --git a/google/cloud/automl/v1/text_extraction.proto b/google/cloud/automl/v1/text_extraction.proto new file mode 100644 index 00000000..f7e10ca8 --- /dev/null +++ b/google/cloud/automl/v1/text_extraction.proto @@ -0,0 +1,70 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.automl.v1; + +import "google/cloud/automl/v1/text_segment.proto"; +import "google/api/annotations.proto"; + +option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; +option java_multiple_files = true; +option java_package = "com.google.cloud.automl.v1"; +option php_namespace = "Google\\Cloud\\AutoML\\V1"; +option ruby_package = "Google::Cloud::AutoML::V1"; + +// Annotation for identifying spans of text. +message TextExtractionAnnotation { + // Required. Text extraction annotations can either be a text segment or a + // text relation. + oneof annotation { + // An entity annotation will set this, which is the part of the original + // text to which the annotation pertains. + TextSegment text_segment = 3; + } + + // Output only. A confidence estimate between 0.0 and 1.0. A higher value + // means greater confidence in correctness of the annotation. + float score = 1; +} + +// Model evaluation metrics for text extraction problems. +message TextExtractionEvaluationMetrics { + // Metrics for a single confidence threshold. + message ConfidenceMetricsEntry { + // Output only. The confidence threshold value used to compute the metrics. + // Only annotations with score of at least this threshold are considered to + // be ones the model would return. + float confidence_threshold = 1; + + // Output only. Recall under the given confidence threshold. + float recall = 3; + + // Output only. Precision under the given confidence threshold. + float precision = 4; + + // Output only. The harmonic mean of recall and precision. + float f1_score = 5; + } + + // Output only. The Area under precision recall curve metric. + float au_prc = 1; + + // Output only. Metrics that have confidence thresholds. + // Precision-recall curve can be derived from it. + repeated ConfidenceMetricsEntry confidence_metrics_entries = 2; +} diff --git a/google/cloud/automl/v1/text_segment.proto b/google/cloud/automl/v1/text_segment.proto new file mode 100644 index 00000000..cb75fc04 --- /dev/null +++ b/google/cloud/automl/v1/text_segment.proto @@ -0,0 +1,43 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.automl.v1; + +import "google/api/annotations.proto"; + +option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; +option java_multiple_files = true; +option java_outer_classname = "TextSegmentProto"; +option java_package = "com.google.cloud.automl.v1"; +option php_namespace = "Google\\Cloud\\AutoML\\V1"; +option ruby_package = "Google::Cloud::AutoML::V1"; + +// A contiguous part of a text (string), assuming it has an UTF-8 NFC encoding. +message TextSegment { + // Output only. The content of the TextSegment. + string content = 3; + + // Required. Zero-based character index of the first character of the text + // segment (counting characters from the beginning of the text). + int64 start_offset = 1; + + // Required. Zero-based character index of the first character past the end of + // the text segment (counting character from the beginning of the text). + // The character at the end_offset is NOT included in the text segment. + int64 end_offset = 2; +} diff --git a/google/cloud/automl/v1/text_sentiment.proto b/google/cloud/automl/v1/text_sentiment.proto new file mode 100644 index 00000000..f87f300b --- /dev/null +++ b/google/cloud/automl/v1/text_sentiment.proto @@ -0,0 +1,79 @@ +// Copyright 2019 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +syntax = "proto3"; + +package google.cloud.automl.v1; + +import "google/api/annotations.proto"; +import "google/cloud/automl/v1/classification.proto"; + +option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; +option java_multiple_files = true; +option java_outer_classname = "TextSentimentProto"; +option java_package = "com.google.cloud.automl.v1"; +option php_namespace = "Google\\Cloud\\AutoML\\V1"; +option ruby_package = "Google::Cloud::AutoML::V1"; + +// Contains annotation details specific to text sentiment. +message TextSentimentAnnotation { + // Output only. The sentiment with the semantic, as given to the + // [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData] when + // populating the dataset from which the model used for the prediction had + // been trained. The sentiment values are between 0 and + // Dataset.text_sentiment_dataset_metadata.sentiment_max (inclusive), + // with higher value meaning more positive sentiment. They are completely + // relative, i.e. 0 means least positive sentiment and sentiment_max means + // the most positive from the sentiments present in the train data. Therefore + // e.g. if train data had only negative sentiment, then sentiment_max, would + // be still negative (although least negative). + // The sentiment shouldn't be confused with "score" or "magnitude" + // from the previous Natural Language Sentiment Analysis API. + int32 sentiment = 1; +} + +// Model evaluation metrics for text sentiment problems. +message TextSentimentEvaluationMetrics { + // Output only. Precision. + float precision = 1; + + // Output only. Recall. + float recall = 2; + + // Output only. The harmonic mean of recall and precision. + float f1_score = 3; + + // Output only. Mean absolute error. Only set for the overall model + // evaluation, not for evaluation of a single annotation spec. + float mean_absolute_error = 4; + + // Output only. Mean squared error. Only set for the overall model + // evaluation, not for evaluation of a single annotation spec. + float mean_squared_error = 5; + + // Output only. Linear weighted kappa. Only set for the overall model + // evaluation, not for evaluation of a single annotation spec. + float linear_kappa = 6; + + // Output only. Quadratic weighted kappa. Only set for the overall model + // evaluation, not for evaluation of a single annotation spec. + float quadratic_kappa = 7; + + // Output only. Confusion matrix of the evaluation. + // Only set for the overall model evaluation, not for evaluation of a single + // annotation spec. + ClassificationEvaluationMetrics.ConfusionMatrix confusion_matrix = 8; +} diff --git a/google/cloud/automl/v1/translation.proto b/google/cloud/automl/v1/translation.proto index bc449fe7..bb67e3d4 100644 --- a/google/cloud/automl/v1/translation.proto +++ b/google/cloud/automl/v1/translation.proto @@ -17,11 +17,12 @@ syntax = "proto3"; package google.cloud.automl.v1; -import "google/cloud/automl/v1/data_items.proto"; import "google/api/annotations.proto"; +import "google/api/field_behavior.proto"; +import "google/cloud/automl/v1/data_items.proto"; -option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option csharp_namespace = "Google.Cloud.AutoML.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_outer_classname = "TranslationProto"; option java_package = "com.google.cloud.automl.v1"; @@ -31,10 +32,10 @@ option ruby_package = "Google::Cloud::AutoML::V1"; // Dataset metadata that is specific to translation. message TranslationDatasetMetadata { // Required. The BCP-47 language code of the source language. - string source_language_code = 1; + string source_language_code = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The BCP-47 language code of the target language. - string target_language_code = 2; + string target_language_code = 2 [(google.api.field_behavior) = REQUIRED]; } // Evaluation metrics for the dataset.