googleapis/google/cloud/datalabeling/v1beta1/dataset.proto

// Copyright 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.datalabeling.v1beta1;

import "google/api/annotations.proto";
import "google/api/resource.proto";
import "google/cloud/datalabeling/v1beta1/annotation.proto";
import "google/cloud/datalabeling/v1beta1/annotation_spec_set.proto";
import "google/cloud/datalabeling/v1beta1/data_payloads.proto";
import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1";
option go_package = "google.golang.org/genproto/googleapis/cloud/datalabeling/v1beta1;datalabeling";
option java_multiple_files = true;
option java_package = "com.google.cloud.datalabeling.v1beta1";
option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1";
option ruby_package = "Google::Cloud::DataLabeling::V1beta1";

enum DataType {
  DATA_TYPE_UNSPECIFIED = 0;

  // Allowed for continuous evaluation.
  IMAGE = 1;

  VIDEO = 2;

  // Allowed for continuous evaluation.
  TEXT = 4;

  // Allowed for continuous evaluation.
  GENERAL_DATA = 6;
}

// Dataset is the resource to hold your data. You can request multiple labeling
// tasks for a dataset while each one will generate an AnnotatedDataset.
message Dataset {
  option (google.api.resource) = {
    type: "datalabeling.googleapis.com/Dataset"
    pattern: "projects/{project}/datasets/{dataset}"
  };

  // Output only. Dataset resource name, format is:
  // projects/{project_id}/datasets/{dataset_id}
  string name = 1;

  // Required. The display name of the dataset. Maximum of 64 characters.
  string display_name = 2;

  // Optional. User-provided description of the annotation specification set.
  // The description can be up to 10000 characters long.
  string description = 3;

  // Output only. Time the dataset is created.
  google.protobuf.Timestamp create_time = 4;

  // Output only. This is populated with the original input configs
  // where ImportData is called. It is available only after the clients
  // import data to this dataset.
  repeated InputConfig input_configs = 5;

  // Output only. The names of any related resources that are blocking changes
  // to the dataset.
  repeated string blocking_resources = 6;

  // Output only. The number of data items in the dataset.
  int64 data_item_count = 7;
}

// The configuration of input data, including data type, location, etc.
message InputConfig {
  // Optional. The metadata associated with each data type.
  oneof data_type_metadata {
    // Required for text import, as language code must be specified.
    TextMetadata text_metadata = 6;
  }

  // Required. Where the data is from.
  oneof source {
    // Source located in Cloud Storage.
    GcsSource gcs_source = 2;

    // Source located in BigQuery. You must specify this field if you are using
    // this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob].
    BigQuerySource bigquery_source = 5;
  }

  // Required. Data type must be specifed when user tries to import data.
  DataType data_type = 1;

  // Optional. The type of annotation to be performed on this data. You must
  // specify this field if you are using this InputConfig in an
  // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob].
  AnnotationType annotation_type = 3;

  // Optional. Metadata about annotations for the input. You must specify this
  // field if you are using this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob] for a
  // model version that performs classification.
  ClassificationMetadata classification_metadata = 4;
}

// Metadata for the text.
message TextMetadata {
  // The language of this text, as a
  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt).
  // Default value is en-US.
  string language_code = 1;
}

// Metadata for classification annotations.
message ClassificationMetadata {
  // Whether the classification task is multi-label or not.
  bool is_multi_label = 1;
}

// Source of the Cloud Storage file to be imported.
message GcsSource {
  // Required. The input URI of source file. This must be a Cloud Storage path
  // (`gs://...`).
  string input_uri = 1;

  // Required. The format of the source file. Only "text/csv" is supported.
  string mime_type = 2;
}

// The BigQuery location for input data. If used in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob], this
// is where the service saves the prediction input and output sampled from the
// model version.
message BigQuerySource {
  // Required. BigQuery URI to a table, up to 2,000 characters long. If you
  // specify the URI of a table that does not exist, Data Labeling Service
  // creates a table at the URI with the correct schema when you create your
  // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob]. If you specify the URI of a table that already exists,
  // it must have the
  // [correct
  // schema](/ml-engine/docs/continuous-evaluation/create-job#table-schema).
  //
  // Provide the table URI in the following format:
  //
  // "bq://<var>{your_project_id}</var>/<var>{your_dataset_name}</var>/<var>{your_table_name}</var>"
  //
  // [Learn
  // more](/ml-engine/docs/continuous-evaluation/create-job#table-schema).
  string input_uri = 1;
}

// The configuration of output data.
message OutputConfig {
  // Required. Location to output data to.
  oneof destination {
    // Output to a file in Cloud Storage. Should be used for labeling output
    // other than image segmentation.
    GcsDestination gcs_destination = 1;

    // Output to a folder in Cloud Storage. Should be used for image
    // segmentation labeling output.
    GcsFolderDestination gcs_folder_destination = 2;
  }
}

// Export destination of the data.Only gcs path is allowed in
// output_uri.
message GcsDestination {
  // Required. The output uri of destination file.
  string output_uri = 1;

  // Required. The format of the gcs destination. Only "text/csv" and
  // "application/json"
  // are supported.
  string mime_type = 2;
}

// Export folder destination of the data.
message GcsFolderDestination {
  // Required. Cloud Storage directory to export data to.
  string output_folder_uri = 1;
}

// DataItem is a piece of data, without annotation. For example, an image.
message DataItem {
  option (google.api.resource) = {
    type: "datalabeling.googleapis.com/DataItem"
    pattern: "projects/{project}/datasets/{dataset}/dataItems/{data_item}"
  };

  // Output only.
  oneof payload {
    // The image payload, a container of the image bytes/uri.
    ImagePayload image_payload = 2;

    // The text payload, a container of text content.
    TextPayload text_payload = 3;

    // The video payload, a container of the video uri.
    VideoPayload video_payload = 4;
  }

  // Output only. Name of the data item, in format of:
  // projects/{project_id}/datasets/{dataset_id}/dataItems/{data_item_id}
  string name = 1;
}

// AnnotatedDataset is a set holding annotations for data in a Dataset. Each
// labeling task will generate an AnnotatedDataset under the Dataset that the
// task is requested for.
message AnnotatedDataset {
  option (google.api.resource) = {
    type: "datalabeling.googleapis.com/AnnotatedDataset"
    pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}"
  };

  // Output only. AnnotatedDataset resource name in format of:
  // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
  // {annotated_dataset_id}
  string name = 1;

  // Output only. The display name of the AnnotatedDataset. It is specified in
  // HumanAnnotationConfig when user starts a labeling task. Maximum of 64
  // characters.
  string display_name = 2;

  // Output only. The description of the AnnotatedDataset. It is specified in
  // HumanAnnotationConfig when user starts a labeling task. Maximum of 10000
  // characters.
  string description = 9;

  // Output only. Source of the annotation.
  AnnotationSource annotation_source = 3;

  // Output only. Type of the annotation. It is specified when starting labeling
  // task.
  AnnotationType annotation_type = 8;

  // Output only. Number of examples in the annotated dataset.
  int64 example_count = 4;

  // Output only. Number of examples that have annotation in the annotated
  // dataset.
  int64 completed_example_count = 5;

  // Output only. Per label statistics.
  LabelStats label_stats = 6;

  // Output only. Time the AnnotatedDataset was created.
  google.protobuf.Timestamp create_time = 7;

  // Output only. Additional information about AnnotatedDataset.
  AnnotatedDatasetMetadata metadata = 10;

  // Output only. The names of any related resources that are blocking changes
  // to the annotated dataset.
  repeated string blocking_resources = 11;
}

// Statistics about annotation specs.
message LabelStats {
  // Map of each annotation spec's example count. Key is the annotation spec
  // name and value is the number of examples for that annotation spec.
  // If the annotated dataset does not have annotation spec, the map will return
  // a pair where the key is empty string and value is the total number of
  // annotations.
  map<string, int64> example_count = 1;
}

// Metadata on AnnotatedDataset.
message AnnotatedDatasetMetadata {
  // Specific request configuration used when requesting the labeling task.
  oneof annotation_request_config {
    // Configuration for image classification task.
    ImageClassificationConfig image_classification_config = 2;

    // Configuration for image bounding box and bounding poly task.
    BoundingPolyConfig bounding_poly_config = 3;

    // Configuration for image polyline task.
    PolylineConfig polyline_config = 4;

    // Configuration for image segmentation task.
    SegmentationConfig segmentation_config = 5;

    // Configuration for video classification task.
    VideoClassificationConfig video_classification_config = 6;

    // Configuration for video object detection task.
    ObjectDetectionConfig object_detection_config = 7;

    // Configuration for video object tracking task.
    ObjectTrackingConfig object_tracking_config = 8;

    // Configuration for video event labeling task.
    EventConfig event_config = 9;

    // Configuration for text classification task.
    TextClassificationConfig text_classification_config = 10;

    // Configuration for text entity extraction task.
    TextEntityExtractionConfig text_entity_extraction_config = 11;
  }

  // HumanAnnotationConfig used when requesting the human labeling task for this
  // AnnotatedDataset.
  HumanAnnotationConfig human_annotation_config = 1;
}

// An Example is a piece of data and its annotation. For example, an image with
// label "house".
message Example {
  option (google.api.resource) = {
    type: "datalabeling.googleapis.com/Example"
    pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}/examples/{example}"
  };

  // Output only. The data part of Example.
  oneof payload {
    // The image payload, a container of the image bytes/uri.
    ImagePayload image_payload = 2;

    // The text payload, a container of the text content.
    TextPayload text_payload = 6;

    // The video payload, a container of the video uri.
    VideoPayload video_payload = 7;
  }

  // Output only. Name of the example, in format of:
  // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
  // {annotated_dataset_id}/examples/{example_id}
  string name = 1;

  // Output only. Annotations for the piece of data in Example.
  // One piece of data can have multiple annotations.
  repeated Annotation annotations = 5;
}