Add proto for cloud vision API.

2016-07-06 09:36:47 -07:00 · 2016-07-06 09:36:47 -07:00 · 0bc7040c28
parent 72ebf10ad5
commit 0bc7040c28
1 changed files with 501 additions and 0 deletions
--- a/google/cloud/vision/v1/image_annotator.proto
+++ b/google/cloud/vision/v1/image_annotator.proto
@ -0,0 +1,501 @@
+// Copyright 2016 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.cloud.vision.v1;
+
+import "google/api/annotations.proto";
+import "google/cloud/vision/v1/geometry.proto";
+import "google/cloud/vision/v1/image_context_search_extension.proto";
+import "google/cloud/vision/v1/query_annotation.proto";
+import "google/rpc/status.proto";
+import "google/type/color.proto";
+import "google/type/latlng.proto";
+
+option cc_enable_arenas = true;
+option java_multiple_files = true;
+option java_outer_classname = "ImageAnnotatorProto";
+option java_package = "com.google.cloud.vision.v1";
+
+
+// Service that performs Google Cloud Vision API detection tasks, such as face,
+// landmark, logo, label, and text detection, over client images, and returns
+// detected entities from the images.
+service ImageAnnotator {
+  // Run image detection and annotation for a batch of images.
+  rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
+    option (google.api.http) = { post: "/v1/images:annotate" body: "*" };
+  }
+}
+
+// The <em>Feature</em> indicates what type of image detection task to perform.
+// Users describe the type of Google Cloud Vision API tasks to perform over
+// images by using <em>Feature</em>s. Features encode the Cloud Vision API
+// vertical to operate on and the number of top-scoring results to return.
+message Feature {
+  // Type of image feature.
+  enum Type {
+    // Unspecified feature type.
+    TYPE_UNSPECIFIED = 0;
+
+    // Run face detection.
+    FACE_DETECTION = 1;
+
+    // Run landmark detection.
+    LANDMARK_DETECTION = 2;
+
+    // Run logo detection.
+    LOGO_DETECTION = 3;
+
+    // Run label detection.
+    LABEL_DETECTION = 4;
+
+    // Run OCR.
+    TEXT_DETECTION = 5;
+
+    // Run various computer vision models to compute image safe-search properties.
+    SAFE_SEARCH_DETECTION = 6;
+
+    // Compute a set of properties about the image (such as the image's dominant colors).
+    IMAGE_PROPERTIES = 7;
+  }
+
+  // The feature type.
+  Type type = 1;
+
+  // Maximum number of results of this type.
+  int32 max_results = 2;
+}
+
+// External image source (Google Cloud Storage image location).
+message ImageSource {
+  // Google Cloud Storage image URI. It must be in the following form:
+  // `gs://bucket_name/object_name`. For more
+  // details, please see: https://cloud.google.com/storage/docs/reference-uris.
+  // NOTE: Cloud Storage object versioning is not supported!
+  string gcs_image_uri = 1;
+}
+
+// Client image to perform Google Cloud Vision API tasks over.
+message Image {
+  // Image content, represented as a stream of bytes.
+  // Note: as with all `bytes` fields, protobuffers use a pure binary
+  // representation, whereas JSON representations use base64.
+  bytes content = 1;
+
+  // Google Cloud Storage image location. If both 'content' and 'source'
+  // are filled for an image, 'content' takes precedence and it will be
+  // used for performing the image annotation request.
+  ImageSource source = 2;
+}
+
+// A face annotation object contains the results of face detection.
+message FaceAnnotation {
+  // A face-specific landmark (for example, a face feature).
+  // Landmark positions may fall outside the bounds of the image
+  // when the face is near one or more edges of the image.
+  // Therefore it is NOT guaranteed that 0 <= x < width or 0 <= y < height.
+  message Landmark {
+    // Face landmark (feature) type.
+    // Left and right are defined from the vantage of the viewer of the image,
+    // without considering mirror projections typical of photos. So, LEFT_EYE,
+    // typically is the person's right eye.
+    enum Type {
+      // Unknown face landmark detected. Should not be filled.
+      UNKNOWN_LANDMARK = 0;
+
+      // Left eye.
+      LEFT_EYE = 1;
+
+      // Right eye.
+      RIGHT_EYE = 2;
+
+      // Left of left eyebrow.
+      LEFT_OF_LEFT_EYEBROW = 3;
+
+      // Right of left eyebrow.
+      RIGHT_OF_LEFT_EYEBROW = 4;
+
+      // Left of right eyebrow.
+      LEFT_OF_RIGHT_EYEBROW = 5;
+
+      // Right of right eyebrow.
+      RIGHT_OF_RIGHT_EYEBROW = 6;
+
+      // Midpoint between eyes.
+      MIDPOINT_BETWEEN_EYES = 7;
+
+      // Nose tip.
+      NOSE_TIP = 8;
+
+      // Upper lip.
+      UPPER_LIP = 9;
+
+      // Lower lip.
+      LOWER_LIP = 10;
+
+      // Mouth left.
+      MOUTH_LEFT = 11;
+
+      // Mouth right.
+      MOUTH_RIGHT = 12;
+
+      // Mouth center.
+      MOUTH_CENTER = 13;
+
+      // Nose, bottom right.
+      NOSE_BOTTOM_RIGHT = 14;
+
+      // Nose, bottom left.
+      NOSE_BOTTOM_LEFT = 15;
+
+      // Nose, bottom center.
+      NOSE_BOTTOM_CENTER = 16;
+
+      // Left eye, top boundary.
+      LEFT_EYE_TOP_BOUNDARY = 17;
+
+      // Left eye, right corner.
+      LEFT_EYE_RIGHT_CORNER = 18;
+
+      // Left eye, bottom boundary.
+      LEFT_EYE_BOTTOM_BOUNDARY = 19;
+
+      // Left eye, left corner.
+      LEFT_EYE_LEFT_CORNER = 20;
+
+      // Right eye, top boundary.
+      RIGHT_EYE_TOP_BOUNDARY = 21;
+
+      // Right eye, right corner.
+      RIGHT_EYE_RIGHT_CORNER = 22;
+
+      // Right eye, bottom boundary.
+      RIGHT_EYE_BOTTOM_BOUNDARY = 23;
+
+      // Right eye, left corner.
+      RIGHT_EYE_LEFT_CORNER = 24;
+
+      // Left eyebrow, upper midpoint.
+      LEFT_EYEBROW_UPPER_MIDPOINT = 25;
+
+      // Right eyebrow, upper midpoint.
+      RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
+
+      // Left ear tragion.
+      LEFT_EAR_TRAGION = 27;
+
+      // Right ear tragion.
+      RIGHT_EAR_TRAGION = 28;
+
+      // Left eye pupil.
+      LEFT_EYE_PUPIL = 29;
+
+      // Right eye pupil.
+      RIGHT_EYE_PUPIL = 30;
+
+      // Forehead glabella.
+      FOREHEAD_GLABELLA = 31;
+
+      // Chin gnathion.
+      CHIN_GNATHION = 32;
+
+      // Chin left gonion.
+      CHIN_LEFT_GONION = 33;
+
+      // Chin right gonion.
+      CHIN_RIGHT_GONION = 34;
+    }
+
+    // Face landmark type.
+    Type type = 3;
+
+    // Face landmark position.
+    google.cloud.vision.v1.Position position = 4;
+  }
+
+  // The bounding polygon around the face. The coordinates of the bounding box
+  // are in the original image's scale, as returned in ImageParams.
+  // The bounding box is computed to "frame" the face in accordance with human
+  // expectations. It is based on the landmarker results.
+  // Note that one or more x and/or y coordinates may not be generated in the
+  // BoundingPoly (the polygon will be unbounded) if only a partial face appears in
+  // the image to be annotated.
+  google.cloud.vision.v1.BoundingPoly bounding_poly = 1;
+
+  // This bounding polygon is tighter than the previous
+  // <code>boundingPoly</code>, and
+  // encloses only the skin part of the face. Typically, it is used to
+  // eliminate the face from any image analysis that detects the
+  // "amount of skin" visible in an image. It is not based on the
+  // landmarker results, only on the initial face detection, hence
+  // the <code>fd</code> (face detection) prefix.
+  google.cloud.vision.v1.BoundingPoly fd_bounding_poly = 2;
+
+  // Detected face landmarks.
+  repeated Landmark landmarks = 3;
+
+  // Roll angle. Indicates the amount of clockwise/anti-clockwise rotation of
+  // the
+  // face relative to the image vertical, about the axis perpendicular to the
+  // face. Range [-180,180].
+  float roll_angle = 4;
+
+  // Yaw angle. Indicates the leftward/rightward angle that the face is
+  // pointing, relative to the vertical plane perpendicular to the image. Range
+  // [-180,180].
+  float pan_angle = 5;
+
+  // Pitch angle. Indicates the upwards/downwards angle that the face is
+  // pointing
+  // relative to the image's horizontal plane. Range [-180,180].
+  float tilt_angle = 6;
+
+  // Detection confidence. Range [0, 1].
+  float detection_confidence = 7;
+
+  // Face landmarking confidence. Range [0, 1].
+  float landmarking_confidence = 8;
+
+  // Joy likelihood.
+  Likelihood joy_likelihood = 9;
+
+  // Sorrow likelihood.
+  Likelihood sorrow_likelihood = 10;
+
+  // Anger likelihood.
+  Likelihood anger_likelihood = 11;
+
+  // Surprise likelihood.
+  Likelihood surprise_likelihood = 12;
+
+  // Under-exposed likelihood.
+  Likelihood under_exposed_likelihood = 13;
+
+  // Blurred likelihood.
+  Likelihood blurred_likelihood = 14;
+
+  // Headwear likelihood.
+  Likelihood headwear_likelihood = 15;
+}
+
+// Detected entity location information.
+message LocationInfo {
+  // Lat - long location coordinates.
+  google.type.LatLng lat_lng = 1;
+}
+
+// Arbitrary name/value pair.
+message Property {
+  // Name of the property.
+  string name = 1;
+
+  // Value of the property.
+  string value = 2;
+}
+
+// Set of detected entity features.
+message EntityAnnotation {
+  // Opaque entity ID. Some IDs might be available in Knowledge Graph(KG).
+  // For more details on KG please see:
+  // https://developers.google.com/knowledge-graph/
+  string mid = 1;
+
+  // The language code for the locale in which the entity textual
+  // <code>description</code> (next field) is expressed.
+  string locale = 2;
+
+  // Entity textual description, expressed in its <code>locale</code> language.
+  string description = 3;
+
+  // Overall score of the result. Range [0, 1].
+  float score = 4;
+
+  // The accuracy of the entity detection in an image.
+  // For example, for an image containing 'Eiffel Tower,' this field represents
+  // the confidence that there is a tower in the query image. Range [0, 1].
+  float confidence = 5;
+
+  // The relevancy of the ICA (Image Content Annotation) label to the
+  // image. For example, the relevancy of 'tower' to an image containing
+  // 'Eiffel Tower' is likely higher than an image containing a distant towering
+  // building, though the confidence that there is a tower may be the same.
+  // Range [0, 1].
+  float topicality = 6;
+
+  // Image region to which this entity belongs. Not filled currently
+  // for `LABEL_DETECTION` features. For `TEXT_DETECTION` (OCR), `boundingPoly`s
+  // are produced for the entire text detected in an image region, followed by
+  // `boundingPoly`s for each word within the detected text.
+  google.cloud.vision.v1.BoundingPoly bounding_poly = 7;
+
+  // The location information for the detected entity. Multiple
+  // <code>LocationInfo</code> elements can be present since one location may
+  // indicate the location of the scene in the query image, and another the
+  // location of the place where the query image was taken. Location information
+  // is usually present for landmarks.
+  repeated LocationInfo locations = 8;
+
+  // Some entities can have additional optional <code>Property</code> fields.
+  // For example a different kind of score or string that qualifies the entity.
+  repeated Property properties = 9;
+}
+
+// Set of features pertaining to the image, computed by various computer vision
+// methods over safe-search verticals (for example, adult, spoof, medical,
+// violence).
+message SafeSearchAnnotation {
+  // Represents the adult contents likelihood for the image.
+  Likelihood adult = 1;
+
+  // Spoof likelihood. The likelihood that an obvious modification
+  // was made to the image's canonical version to make it appear
+  // funny or offensive.
+  Likelihood spoof = 2;
+
+  // Likelihood this is a medical image.
+  Likelihood medical = 3;
+
+  // Violence likelihood.
+  Likelihood violence = 4;
+}
+
+// Rectangle determined by min and max LatLng pairs.
+message LatLongRect {
+  // Min lat/long pair.
+  google.type.LatLng min_lat_lng = 1;
+
+  // Max lat/long pair.
+  google.type.LatLng max_lat_lng = 2;
+}
+
+// Color information consists of RGB channels, score and fraction of
+// image the color occupies in the image.
+message ColorInfo {
+  // RGB components of the color.
+  google.type.Color color = 1;
+
+  // Image-specific score for this color. Value in range [0, 1].
+  float score = 2;
+
+  // Stores the fraction of pixels the color occupies in the image.
+  // Value in range [0, 1].
+  float pixel_fraction = 3;
+}
+
+// Set of dominant colors and their corresponding scores.
+message DominantColorsAnnotation {
+  // RGB color values, with their score and pixel fraction.
+  repeated ColorInfo colors = 1;
+}
+
+// Stores image properties (e.g. dominant colors).
+message ImageProperties {
+  // If present, dominant colors completed successfully.
+  DominantColorsAnnotation dominant_colors = 1;
+}
+
+// Image context.
+message ImageContext {
+  // Lat/long rectangle that specifies the location of the image.
+  LatLongRect lat_long_rect = 1;
+
+  // List of languages to use for TEXT_DETECTION. In most cases, an empty value
+  // yields the best results since it enables automatic language detection. For
+  // languages based on the Latin alphabet, setting `language_hints` is not
+  // needed. In rare cases, when the language of the text in the image is known,
+  // setting a hint will help get better results (although it will be a
+  // significant hindrance if the hint is wrong). Text detection returns an
+  // error if one or more of the specified languages is not one of the
+  // [supported
+  // languages](/translate/v2/translate-reference#supported_languages).
+  repeated string language_hints = 2;
+}
+
+// Request for performing Google Cloud Vision API tasks over a user-provided
+// image, with user-requested features.
+message AnnotateImageRequest {
+  // The image to be processed.
+  Image image = 1;
+
+  // Requested features.
+  repeated Feature features = 2;
+
+  // Additional context that may accompany the image.
+  ImageContext image_context = 3;
+}
+
+// Response to an image annotation request.
+message AnnotateImageResponse {
+  // If present, face detection completed successfully.
+  repeated FaceAnnotation face_annotations = 1;
+
+  // If present, landmark detection completed successfully.
+  repeated EntityAnnotation landmark_annotations = 2;
+
+  // If present, logo detection completed successfully.
+  repeated EntityAnnotation logo_annotations = 3;
+
+  // If present, label detection completed successfully.
+  repeated EntityAnnotation label_annotations = 4;
+
+  // If present, text (OCR) detection completed successfully.
+  repeated EntityAnnotation text_annotations = 5;
+
+  // If present, safe-search annotation completed successfully.
+  SafeSearchAnnotation safe_search_annotation = 6;
+
+  // If present, image properties were extracted successfully.
+  ImageProperties image_properties_annotation = 8;
+
+  // If set, represents the error message for the operation.
+  // Note that filled-in mage annotations are guaranteed to be
+  // correct, even when <code>error</code> is non-empty.
+  google.rpc.Status error = 9;
+}
+
+// Multiple image annotation requests are batched into a single service call.
+message BatchAnnotateImagesRequest {
+  // Individual image annotation requests for this batch.
+  repeated AnnotateImageRequest requests = 1;
+}
+
+// Response to a batch image annotation request.
+message BatchAnnotateImagesResponse {
+  // Individual responses to image annotation requests within the batch.
+  repeated AnnotateImageResponse responses = 1;
+}
+
+// A bucketized representation of likelihood meant to give our clients highly
+// stable results across model upgrades.
+enum Likelihood {
+  // Unknown likelihood.
+  UNKNOWN = 0;
+
+  // The image very unlikely belongs to the vertical specified.
+  VERY_UNLIKELY = 1;
+
+  // The image unlikely belongs to the vertical specified.
+  UNLIKELY = 2;
+
+  // The image possibly belongs to the vertical specified.
+  POSSIBLE = 3;
+
+  // The image likely belongs to the vertical specified.
+  LIKELY = 4;
+
+  // The image very likely belongs to the vertical specified.
+  VERY_LIKELY = 5;
+}