Add proto for cloud vision API.

This commit is contained in:
Ethan Bao 2016-07-06 09:36:47 -07:00
parent 72ebf10ad5
commit 0bc7040c28
1 changed files with 501 additions and 0 deletions

View File

@ -0,0 +1,501 @@
// Copyright 2016 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.vision.v1;
import "google/api/annotations.proto";
import "google/cloud/vision/v1/geometry.proto";
import "google/cloud/vision/v1/image_context_search_extension.proto";
import "google/cloud/vision/v1/query_annotation.proto";
import "google/rpc/status.proto";
import "google/type/color.proto";
import "google/type/latlng.proto";
option cc_enable_arenas = true;
option java_multiple_files = true;
option java_outer_classname = "ImageAnnotatorProto";
option java_package = "com.google.cloud.vision.v1";
// Service that performs Google Cloud Vision API detection tasks, such as face,
// landmark, logo, label, and text detection, over client images, and returns
// detected entities from the images.
service ImageAnnotator {
// Run image detection and annotation for a batch of images.
rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
option (google.api.http) = { post: "/v1/images:annotate" body: "*" };
}
}
// The <em>Feature</em> indicates what type of image detection task to perform.
// Users describe the type of Google Cloud Vision API tasks to perform over
// images by using <em>Feature</em>s. Features encode the Cloud Vision API
// vertical to operate on and the number of top-scoring results to return.
message Feature {
// Type of image feature.
enum Type {
// Unspecified feature type.
TYPE_UNSPECIFIED = 0;
// Run face detection.
FACE_DETECTION = 1;
// Run landmark detection.
LANDMARK_DETECTION = 2;
// Run logo detection.
LOGO_DETECTION = 3;
// Run label detection.
LABEL_DETECTION = 4;
// Run OCR.
TEXT_DETECTION = 5;
// Run various computer vision models to compute image safe-search properties.
SAFE_SEARCH_DETECTION = 6;
// Compute a set of properties about the image (such as the image's dominant colors).
IMAGE_PROPERTIES = 7;
}
// The feature type.
Type type = 1;
// Maximum number of results of this type.
int32 max_results = 2;
}
// External image source (Google Cloud Storage image location).
message ImageSource {
// Google Cloud Storage image URI. It must be in the following form:
// `gs://bucket_name/object_name`. For more
// details, please see: https://cloud.google.com/storage/docs/reference-uris.
// NOTE: Cloud Storage object versioning is not supported!
string gcs_image_uri = 1;
}
// Client image to perform Google Cloud Vision API tasks over.
message Image {
// Image content, represented as a stream of bytes.
// Note: as with all `bytes` fields, protobuffers use a pure binary
// representation, whereas JSON representations use base64.
bytes content = 1;
// Google Cloud Storage image location. If both 'content' and 'source'
// are filled for an image, 'content' takes precedence and it will be
// used for performing the image annotation request.
ImageSource source = 2;
}
// A face annotation object contains the results of face detection.
message FaceAnnotation {
// A face-specific landmark (for example, a face feature).
// Landmark positions may fall outside the bounds of the image
// when the face is near one or more edges of the image.
// Therefore it is NOT guaranteed that 0 <= x < width or 0 <= y < height.
message Landmark {
// Face landmark (feature) type.
// Left and right are defined from the vantage of the viewer of the image,
// without considering mirror projections typical of photos. So, LEFT_EYE,
// typically is the person's right eye.
enum Type {
// Unknown face landmark detected. Should not be filled.
UNKNOWN_LANDMARK = 0;
// Left eye.
LEFT_EYE = 1;
// Right eye.
RIGHT_EYE = 2;
// Left of left eyebrow.
LEFT_OF_LEFT_EYEBROW = 3;
// Right of left eyebrow.
RIGHT_OF_LEFT_EYEBROW = 4;
// Left of right eyebrow.
LEFT_OF_RIGHT_EYEBROW = 5;
// Right of right eyebrow.
RIGHT_OF_RIGHT_EYEBROW = 6;
// Midpoint between eyes.
MIDPOINT_BETWEEN_EYES = 7;
// Nose tip.
NOSE_TIP = 8;
// Upper lip.
UPPER_LIP = 9;
// Lower lip.
LOWER_LIP = 10;
// Mouth left.
MOUTH_LEFT = 11;
// Mouth right.
MOUTH_RIGHT = 12;
// Mouth center.
MOUTH_CENTER = 13;
// Nose, bottom right.
NOSE_BOTTOM_RIGHT = 14;
// Nose, bottom left.
NOSE_BOTTOM_LEFT = 15;
// Nose, bottom center.
NOSE_BOTTOM_CENTER = 16;
// Left eye, top boundary.
LEFT_EYE_TOP_BOUNDARY = 17;
// Left eye, right corner.
LEFT_EYE_RIGHT_CORNER = 18;
// Left eye, bottom boundary.
LEFT_EYE_BOTTOM_BOUNDARY = 19;
// Left eye, left corner.
LEFT_EYE_LEFT_CORNER = 20;
// Right eye, top boundary.
RIGHT_EYE_TOP_BOUNDARY = 21;
// Right eye, right corner.
RIGHT_EYE_RIGHT_CORNER = 22;
// Right eye, bottom boundary.
RIGHT_EYE_BOTTOM_BOUNDARY = 23;
// Right eye, left corner.
RIGHT_EYE_LEFT_CORNER = 24;
// Left eyebrow, upper midpoint.
LEFT_EYEBROW_UPPER_MIDPOINT = 25;
// Right eyebrow, upper midpoint.
RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
// Left ear tragion.
LEFT_EAR_TRAGION = 27;
// Right ear tragion.
RIGHT_EAR_TRAGION = 28;
// Left eye pupil.
LEFT_EYE_PUPIL = 29;
// Right eye pupil.
RIGHT_EYE_PUPIL = 30;
// Forehead glabella.
FOREHEAD_GLABELLA = 31;
// Chin gnathion.
CHIN_GNATHION = 32;
// Chin left gonion.
CHIN_LEFT_GONION = 33;
// Chin right gonion.
CHIN_RIGHT_GONION = 34;
}
// Face landmark type.
Type type = 3;
// Face landmark position.
google.cloud.vision.v1.Position position = 4;
}
// The bounding polygon around the face. The coordinates of the bounding box
// are in the original image's scale, as returned in ImageParams.
// The bounding box is computed to "frame" the face in accordance with human
// expectations. It is based on the landmarker results.
// Note that one or more x and/or y coordinates may not be generated in the
// BoundingPoly (the polygon will be unbounded) if only a partial face appears in
// the image to be annotated.
google.cloud.vision.v1.BoundingPoly bounding_poly = 1;
// This bounding polygon is tighter than the previous
// <code>boundingPoly</code>, and
// encloses only the skin part of the face. Typically, it is used to
// eliminate the face from any image analysis that detects the
// "amount of skin" visible in an image. It is not based on the
// landmarker results, only on the initial face detection, hence
// the <code>fd</code> (face detection) prefix.
google.cloud.vision.v1.BoundingPoly fd_bounding_poly = 2;
// Detected face landmarks.
repeated Landmark landmarks = 3;
// Roll angle. Indicates the amount of clockwise/anti-clockwise rotation of
// the
// face relative to the image vertical, about the axis perpendicular to the
// face. Range [-180,180].
float roll_angle = 4;
// Yaw angle. Indicates the leftward/rightward angle that the face is
// pointing, relative to the vertical plane perpendicular to the image. Range
// [-180,180].
float pan_angle = 5;
// Pitch angle. Indicates the upwards/downwards angle that the face is
// pointing
// relative to the image's horizontal plane. Range [-180,180].
float tilt_angle = 6;
// Detection confidence. Range [0, 1].
float detection_confidence = 7;
// Face landmarking confidence. Range [0, 1].
float landmarking_confidence = 8;
// Joy likelihood.
Likelihood joy_likelihood = 9;
// Sorrow likelihood.
Likelihood sorrow_likelihood = 10;
// Anger likelihood.
Likelihood anger_likelihood = 11;
// Surprise likelihood.
Likelihood surprise_likelihood = 12;
// Under-exposed likelihood.
Likelihood under_exposed_likelihood = 13;
// Blurred likelihood.
Likelihood blurred_likelihood = 14;
// Headwear likelihood.
Likelihood headwear_likelihood = 15;
}
// Detected entity location information.
message LocationInfo {
// Lat - long location coordinates.
google.type.LatLng lat_lng = 1;
}
// Arbitrary name/value pair.
message Property {
// Name of the property.
string name = 1;
// Value of the property.
string value = 2;
}
// Set of detected entity features.
message EntityAnnotation {
// Opaque entity ID. Some IDs might be available in Knowledge Graph(KG).
// For more details on KG please see:
// https://developers.google.com/knowledge-graph/
string mid = 1;
// The language code for the locale in which the entity textual
// <code>description</code> (next field) is expressed.
string locale = 2;
// Entity textual description, expressed in its <code>locale</code> language.
string description = 3;
// Overall score of the result. Range [0, 1].
float score = 4;
// The accuracy of the entity detection in an image.
// For example, for an image containing 'Eiffel Tower,' this field represents
// the confidence that there is a tower in the query image. Range [0, 1].
float confidence = 5;
// The relevancy of the ICA (Image Content Annotation) label to the
// image. For example, the relevancy of 'tower' to an image containing
// 'Eiffel Tower' is likely higher than an image containing a distant towering
// building, though the confidence that there is a tower may be the same.
// Range [0, 1].
float topicality = 6;
// Image region to which this entity belongs. Not filled currently
// for `LABEL_DETECTION` features. For `TEXT_DETECTION` (OCR), `boundingPoly`s
// are produced for the entire text detected in an image region, followed by
// `boundingPoly`s for each word within the detected text.
google.cloud.vision.v1.BoundingPoly bounding_poly = 7;
// The location information for the detected entity. Multiple
// <code>LocationInfo</code> elements can be present since one location may
// indicate the location of the scene in the query image, and another the
// location of the place where the query image was taken. Location information
// is usually present for landmarks.
repeated LocationInfo locations = 8;
// Some entities can have additional optional <code>Property</code> fields.
// For example a different kind of score or string that qualifies the entity.
repeated Property properties = 9;
}
// Set of features pertaining to the image, computed by various computer vision
// methods over safe-search verticals (for example, adult, spoof, medical,
// violence).
message SafeSearchAnnotation {
// Represents the adult contents likelihood for the image.
Likelihood adult = 1;
// Spoof likelihood. The likelihood that an obvious modification
// was made to the image's canonical version to make it appear
// funny or offensive.
Likelihood spoof = 2;
// Likelihood this is a medical image.
Likelihood medical = 3;
// Violence likelihood.
Likelihood violence = 4;
}
// Rectangle determined by min and max LatLng pairs.
message LatLongRect {
// Min lat/long pair.
google.type.LatLng min_lat_lng = 1;
// Max lat/long pair.
google.type.LatLng max_lat_lng = 2;
}
// Color information consists of RGB channels, score and fraction of
// image the color occupies in the image.
message ColorInfo {
// RGB components of the color.
google.type.Color color = 1;
// Image-specific score for this color. Value in range [0, 1].
float score = 2;
// Stores the fraction of pixels the color occupies in the image.
// Value in range [0, 1].
float pixel_fraction = 3;
}
// Set of dominant colors and their corresponding scores.
message DominantColorsAnnotation {
// RGB color values, with their score and pixel fraction.
repeated ColorInfo colors = 1;
}
// Stores image properties (e.g. dominant colors).
message ImageProperties {
// If present, dominant colors completed successfully.
DominantColorsAnnotation dominant_colors = 1;
}
// Image context.
message ImageContext {
// Lat/long rectangle that specifies the location of the image.
LatLongRect lat_long_rect = 1;
// List of languages to use for TEXT_DETECTION. In most cases, an empty value
// yields the best results since it enables automatic language detection. For
// languages based on the Latin alphabet, setting `language_hints` is not
// needed. In rare cases, when the language of the text in the image is known,
// setting a hint will help get better results (although it will be a
// significant hindrance if the hint is wrong). Text detection returns an
// error if one or more of the specified languages is not one of the
// [supported
// languages](/translate/v2/translate-reference#supported_languages).
repeated string language_hints = 2;
}
// Request for performing Google Cloud Vision API tasks over a user-provided
// image, with user-requested features.
message AnnotateImageRequest {
// The image to be processed.
Image image = 1;
// Requested features.
repeated Feature features = 2;
// Additional context that may accompany the image.
ImageContext image_context = 3;
}
// Response to an image annotation request.
message AnnotateImageResponse {
// If present, face detection completed successfully.
repeated FaceAnnotation face_annotations = 1;
// If present, landmark detection completed successfully.
repeated EntityAnnotation landmark_annotations = 2;
// If present, logo detection completed successfully.
repeated EntityAnnotation logo_annotations = 3;
// If present, label detection completed successfully.
repeated EntityAnnotation label_annotations = 4;
// If present, text (OCR) detection completed successfully.
repeated EntityAnnotation text_annotations = 5;
// If present, safe-search annotation completed successfully.
SafeSearchAnnotation safe_search_annotation = 6;
// If present, image properties were extracted successfully.
ImageProperties image_properties_annotation = 8;
// If set, represents the error message for the operation.
// Note that filled-in mage annotations are guaranteed to be
// correct, even when <code>error</code> is non-empty.
google.rpc.Status error = 9;
}
// Multiple image annotation requests are batched into a single service call.
message BatchAnnotateImagesRequest {
// Individual image annotation requests for this batch.
repeated AnnotateImageRequest requests = 1;
}
// Response to a batch image annotation request.
message BatchAnnotateImagesResponse {
// Individual responses to image annotation requests within the batch.
repeated AnnotateImageResponse responses = 1;
}
// A bucketized representation of likelihood meant to give our clients highly
// stable results across model upgrades.
enum Likelihood {
// Unknown likelihood.
UNKNOWN = 0;
// The image very unlikely belongs to the vertical specified.
VERY_UNLIKELY = 1;
// The image unlikely belongs to the vertical specified.
UNLIKELY = 2;
// The image possibly belongs to the vertical specified.
POSSIBLE = 3;
// The image likely belongs to the vertical specified.
LIKELY = 4;
// The image very likely belongs to the vertical specified.
VERY_LIKELY = 5;
}