diff --git a/google/cloud/vision/v1/image_annotator.proto b/google/cloud/vision/v1/image_annotator.proto index acd7b268..f8d82084 100644 --- a/google/cloud/vision/v1/image_annotator.proto +++ b/google/cloud/vision/v1/image_annotator.proto @@ -103,6 +103,9 @@ message Feature { // Run web detection. WEB_DETECTION = 10; + + // Run localizer for object detection. + OBJECT_LOCALIZATION = 19; } // The feature type. @@ -410,6 +413,26 @@ message EntityAnnotation { repeated Property properties = 9; } +// Set of detected objects with bounding boxes. +message LocalizedObjectAnnotation { + // Object ID that should align with EntityAnnotation mid. + string mid = 1; + + // The BCP-47 language code, such as "en-US" or "sr-Latn". For more + // information, see + // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. + string language_code = 2; + + // Object name, expressed in its `language_code` language. + string name = 3; + + // Score of the result. Range [0, 1]. + float score = 4; + + // Image region to which this object belongs. This must be populated. + BoundingPoly bounding_poly = 5; +} + // Set of features pertaining to the image, computed by computer vision // methods over safe-search verticals (for example, adult, spoof, medical, // violence). @@ -569,6 +592,10 @@ message AnnotateImageResponse { // If present, label detection has completed successfully. repeated EntityAnnotation label_annotations = 4; + // If present, localized object detection has completed successfully. + // This will be sorted descending by confidence score. + repeated LocalizedObjectAnnotation localized_object_annotations = 22; + // If present, text (OCR) detection has completed successfully. repeated EntityAnnotation text_annotations = 5;