googleapis/google/privacy/dlp/v2/dlp.proto

3114 lines
109 KiB
Protocol Buffer
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
syntax = "proto3";
package google.privacy.dlp.v2;
import "google/api/annotations.proto";
import "google/privacy/dlp/v2/storage.proto";
import "google/protobuf/duration.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
import "google/type/date.proto";
import "google/type/dayofweek.proto";
import "google/type/timeofday.proto";
option csharp_namespace = "Google.Cloud.Dlp.V2";
option go_package = "google.golang.org/genproto/googleapis/privacy/dlp/v2;dlp";
option java_multiple_files = true;
option java_outer_classname = "DlpProto";
option java_package = "com.google.privacy.dlp.v2";
option php_namespace = "Google\\Cloud\\Dlp\\V2";
// The Cloud Data Loss Prevention (DLP) API is a service that allows clients
// to detect the presence of Personally Identifiable Information (PII) and other
// privacy-sensitive data in user-supplied, unstructured data streams, like text
// blocks or images.
// The service also includes methods for sensitive data redaction and
// scheduling of data scans on Google Cloud Platform based data sets.
//
// To learn more about concepts and find how-to guides see
// https://cloud.google.com/dlp/docs/.
service DlpService {
// Finds potentially sensitive info in content.
// This method has limits on input size, processing time, and output size.
//
// When no InfoTypes or CustomInfoTypes are specified in this request, the
// system will automatically choose what detectors to run. By default this may
// be all types, but may change over time as detectors are updated.
//
// For how to guides, see https://cloud.google.com/dlp/docs/inspecting-images
// and https://cloud.google.com/dlp/docs/inspecting-text,
rpc InspectContent(InspectContentRequest) returns (InspectContentResponse) {
option (google.api.http) = {
post: "/v2/{parent=projects/*}/content:inspect"
body: "*"
};
}
// Redacts potentially sensitive info from an image.
// This method has limits on input size, processing time, and output size.
// See https://cloud.google.com/dlp/docs/redacting-sensitive-data-images to
// learn more.
//
// When no InfoTypes or CustomInfoTypes are specified in this request, the
// system will automatically choose what detectors to run. By default this may
// be all types, but may change over time as detectors are updated.
rpc RedactImage(RedactImageRequest) returns (RedactImageResponse) {
option (google.api.http) = {
post: "/v2/{parent=projects/*}/image:redact"
body: "*"
};
}
// De-identifies potentially sensitive info from a ContentItem.
// This method has limits on input size and output size.
// See https://cloud.google.com/dlp/docs/deidentify-sensitive-data to
// learn more.
//
// When no InfoTypes or CustomInfoTypes are specified in this request, the
// system will automatically choose what detectors to run. By default this may
// be all types, but may change over time as detectors are updated.
rpc DeidentifyContent(DeidentifyContentRequest)
returns (DeidentifyContentResponse) {
option (google.api.http) = {
post: "/v2/{parent=projects/*}/content:deidentify"
body: "*"
};
}
// Re-identifies content that has been de-identified.
// See
// https://cloud.google.com/dlp/docs/pseudonymization#re-identification_in_free_text_code_example
// to learn more.
rpc ReidentifyContent(ReidentifyContentRequest)
returns (ReidentifyContentResponse) {
option (google.api.http) = {
post: "/v2/{parent=projects/*}/content:reidentify"
body: "*"
};
}
// Returns a list of the sensitive information types that the DLP API
// supports. See https://cloud.google.com/dlp/docs/infotypes-reference to
// learn more.
rpc ListInfoTypes(ListInfoTypesRequest) returns (ListInfoTypesResponse) {
option (google.api.http) = {
get: "/v2/infoTypes"
};
}
// Creates an InspectTemplate for re-using frequently used configuration
// for inspecting content, images, and storage.
// See https://cloud.google.com/dlp/docs/creating-templates to learn more.
rpc CreateInspectTemplate(CreateInspectTemplateRequest)
returns (InspectTemplate) {
option (google.api.http) = {
post: "/v2/{parent=organizations/*}/inspectTemplates"
body: "*"
additional_bindings {
post: "/v2/{parent=projects/*}/inspectTemplates"
body: "*"
}
};
}
// Updates the InspectTemplate.
// See https://cloud.google.com/dlp/docs/creating-templates to learn more.
rpc UpdateInspectTemplate(UpdateInspectTemplateRequest)
returns (InspectTemplate) {
option (google.api.http) = {
patch: "/v2/{name=organizations/*/inspectTemplates/*}"
body: "*"
additional_bindings {
patch: "/v2/{name=projects/*/inspectTemplates/*}"
body: "*"
}
};
}
// Gets an InspectTemplate.
// See https://cloud.google.com/dlp/docs/creating-templates to learn more.
rpc GetInspectTemplate(GetInspectTemplateRequest) returns (InspectTemplate) {
option (google.api.http) = {
get: "/v2/{name=organizations/*/inspectTemplates/*}"
additional_bindings { get: "/v2/{name=projects/*/inspectTemplates/*}" }
};
}
// Lists InspectTemplates.
// See https://cloud.google.com/dlp/docs/creating-templates to learn more.
rpc ListInspectTemplates(ListInspectTemplatesRequest)
returns (ListInspectTemplatesResponse) {
option (google.api.http) = {
get: "/v2/{parent=organizations/*}/inspectTemplates"
additional_bindings { get: "/v2/{parent=projects/*}/inspectTemplates" }
};
}
// Deletes an InspectTemplate.
// See https://cloud.google.com/dlp/docs/creating-templates to learn more.
rpc DeleteInspectTemplate(DeleteInspectTemplateRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v2/{name=organizations/*/inspectTemplates/*}"
additional_bindings { delete: "/v2/{name=projects/*/inspectTemplates/*}" }
};
}
// Creates a DeidentifyTemplate for re-using frequently used configuration
// for de-identifying content, images, and storage.
// See https://cloud.google.com/dlp/docs/creating-templates-deid to learn
// more.
rpc CreateDeidentifyTemplate(CreateDeidentifyTemplateRequest)
returns (DeidentifyTemplate) {
option (google.api.http) = {
post: "/v2/{parent=organizations/*}/deidentifyTemplates"
body: "*"
additional_bindings {
post: "/v2/{parent=projects/*}/deidentifyTemplates"
body: "*"
}
};
}
// Updates the DeidentifyTemplate.
// See https://cloud.google.com/dlp/docs/creating-templates-deid to learn
// more.
rpc UpdateDeidentifyTemplate(UpdateDeidentifyTemplateRequest)
returns (DeidentifyTemplate) {
option (google.api.http) = {
patch: "/v2/{name=organizations/*/deidentifyTemplates/*}"
body: "*"
additional_bindings {
patch: "/v2/{name=projects/*/deidentifyTemplates/*}"
body: "*"
}
};
}
// Gets a DeidentifyTemplate.
// See https://cloud.google.com/dlp/docs/creating-templates-deid to learn
// more.
rpc GetDeidentifyTemplate(GetDeidentifyTemplateRequest)
returns (DeidentifyTemplate) {
option (google.api.http) = {
get: "/v2/{name=organizations/*/deidentifyTemplates/*}"
additional_bindings { get: "/v2/{name=projects/*/deidentifyTemplates/*}" }
};
}
// Lists DeidentifyTemplates.
// See https://cloud.google.com/dlp/docs/creating-templates-deid to learn
// more.
rpc ListDeidentifyTemplates(ListDeidentifyTemplatesRequest)
returns (ListDeidentifyTemplatesResponse) {
option (google.api.http) = {
get: "/v2/{parent=organizations/*}/deidentifyTemplates"
additional_bindings { get: "/v2/{parent=projects/*}/deidentifyTemplates" }
};
}
// Deletes a DeidentifyTemplate.
// See https://cloud.google.com/dlp/docs/creating-templates-deid to learn
// more.
rpc DeleteDeidentifyTemplate(DeleteDeidentifyTemplateRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v2/{name=organizations/*/deidentifyTemplates/*}"
additional_bindings {
delete: "/v2/{name=projects/*/deidentifyTemplates/*}"
}
};
}
// Creates a job trigger to run DLP actions such as scanning storage for
// sensitive information on a set schedule.
// See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more.
rpc CreateJobTrigger(CreateJobTriggerRequest) returns (JobTrigger) {
option (google.api.http) = {
post: "/v2/{parent=projects/*}/jobTriggers"
body: "*"
};
}
// Updates a job trigger.
// See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more.
rpc UpdateJobTrigger(UpdateJobTriggerRequest) returns (JobTrigger) {
option (google.api.http) = {
patch: "/v2/{name=projects/*/jobTriggers/*}"
body: "*"
};
}
// Gets a job trigger.
// See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more.
rpc GetJobTrigger(GetJobTriggerRequest) returns (JobTrigger) {
option (google.api.http) = {
get: "/v2/{name=projects/*/jobTriggers/*}"
};
}
// Lists job triggers.
// See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more.
rpc ListJobTriggers(ListJobTriggersRequest)
returns (ListJobTriggersResponse) {
option (google.api.http) = {
get: "/v2/{parent=projects/*}/jobTriggers"
};
}
// Deletes a job trigger.
// See https://cloud.google.com/dlp/docs/creating-job-triggers to learn more.
rpc DeleteJobTrigger(DeleteJobTriggerRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v2/{name=projects/*/jobTriggers/*}"
};
}
// Activate a job trigger. Causes the immediate execute of a trigger
// instead of waiting on the trigger event to occur.
rpc ActivateJobTrigger(ActivateJobTriggerRequest) returns (DlpJob) {
option (google.api.http) = {
post: "/v2/{name=projects/*/jobTriggers/*}:activate"
body: "*"
};
}
// Creates a new job to inspect storage or calculate risk metrics.
// See https://cloud.google.com/dlp/docs/inspecting-storage and
// https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
//
// When no InfoTypes or CustomInfoTypes are specified in inspect jobs, the
// system will automatically choose what detectors to run. By default this may
// be all types, but may change over time as detectors are updated.
rpc CreateDlpJob(CreateDlpJobRequest) returns (DlpJob) {
option (google.api.http) = {
post: "/v2/{parent=projects/*}/dlpJobs"
body: "*"
};
}
// Lists DlpJobs that match the specified filter in the request.
// See https://cloud.google.com/dlp/docs/inspecting-storage and
// https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
rpc ListDlpJobs(ListDlpJobsRequest) returns (ListDlpJobsResponse) {
option (google.api.http) = {
get: "/v2/{parent=projects/*}/dlpJobs"
};
}
// Gets the latest state of a long-running DlpJob.
// See https://cloud.google.com/dlp/docs/inspecting-storage and
// https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
rpc GetDlpJob(GetDlpJobRequest) returns (DlpJob) {
option (google.api.http) = {
get: "/v2/{name=projects/*/dlpJobs/*}"
};
}
// Deletes a long-running DlpJob. This method indicates that the client is
// no longer interested in the DlpJob result. The job will be cancelled if
// possible.
// See https://cloud.google.com/dlp/docs/inspecting-storage and
// https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
rpc DeleteDlpJob(DeleteDlpJobRequest) returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v2/{name=projects/*/dlpJobs/*}"
};
}
// Starts asynchronous cancellation on a long-running DlpJob. The server
// makes a best effort to cancel the DlpJob, but success is not
// guaranteed.
// See https://cloud.google.com/dlp/docs/inspecting-storage and
// https://cloud.google.com/dlp/docs/compute-risk-analysis to learn more.
rpc CancelDlpJob(CancelDlpJobRequest) returns (google.protobuf.Empty) {
option (google.api.http) = {
post: "/v2/{name=projects/*/dlpJobs/*}:cancel"
body: "*"
};
}
// Creates a pre-built stored infoType to be used for inspection.
// See https://cloud.google.com/dlp/docs/creating-stored-infotypes to
// learn more.
rpc CreateStoredInfoType(CreateStoredInfoTypeRequest)
returns (StoredInfoType) {
option (google.api.http) = {
post: "/v2/{parent=organizations/*}/storedInfoTypes"
body: "*"
additional_bindings {
post: "/v2/{parent=projects/*}/storedInfoTypes"
body: "*"
}
};
}
// Updates the stored infoType by creating a new version. The existing version
// will continue to be used until the new version is ready.
// See https://cloud.google.com/dlp/docs/creating-stored-infotypes to
// learn more.
rpc UpdateStoredInfoType(UpdateStoredInfoTypeRequest)
returns (StoredInfoType) {
option (google.api.http) = {
patch: "/v2/{name=organizations/*/storedInfoTypes/*}"
body: "*"
additional_bindings {
patch: "/v2/{name=projects/*/storedInfoTypes/*}"
body: "*"
}
};
}
// Gets a stored infoType.
// See https://cloud.google.com/dlp/docs/creating-stored-infotypes to
// learn more.
rpc GetStoredInfoType(GetStoredInfoTypeRequest) returns (StoredInfoType) {
option (google.api.http) = {
get: "/v2/{name=organizations/*/storedInfoTypes/*}"
additional_bindings { get: "/v2/{name=projects/*/storedInfoTypes/*}" }
};
}
// Lists stored infoTypes.
// See https://cloud.google.com/dlp/docs/creating-stored-infotypes to
// learn more.
rpc ListStoredInfoTypes(ListStoredInfoTypesRequest)
returns (ListStoredInfoTypesResponse) {
option (google.api.http) = {
get: "/v2/{parent=organizations/*}/storedInfoTypes"
additional_bindings { get: "/v2/{parent=projects/*}/storedInfoTypes" }
};
}
// Deletes a stored infoType.
// See https://cloud.google.com/dlp/docs/creating-stored-infotypes to
// learn more.
rpc DeleteStoredInfoType(DeleteStoredInfoTypeRequest)
returns (google.protobuf.Empty) {
option (google.api.http) = {
delete: "/v2/{name=organizations/*/storedInfoTypes/*}"
additional_bindings { delete: "/v2/{name=projects/*/storedInfoTypes/*}" }
};
}
}
// List of exclude infoTypes.
message ExcludeInfoTypes {
// InfoType list in ExclusionRule rule drops a finding when it overlaps or
// contained within with a finding of an infoType from this list. For
// example, for `InspectionRuleSet.info_types` containing "PHONE_NUMBER"` and
// `exclusion_rule` containing `exclude_info_types.info_types` with
// "EMAIL_ADDRESS" the phone number findings are dropped if they overlap
// with EMAIL_ADDRESS finding.
// That leads to "555-222-2222@example.org" to generate only a single
// finding, namely email address.
repeated InfoType info_types = 1;
}
// The rule that specifies conditions when findings of infoTypes specified in
// `InspectionRuleSet` are removed from results.
message ExclusionRule {
oneof type {
// Dictionary which defines the rule.
CustomInfoType.Dictionary dictionary = 1;
// Regular expression which defines the rule.
CustomInfoType.Regex regex = 2;
// Set of infoTypes for which findings would affect this rule.
ExcludeInfoTypes exclude_info_types = 3;
}
// How the rule is applied, see MatchingType documentation for details.
MatchingType matching_type = 4;
}
// Options describing which parts of the provided content should be scanned.
enum ContentOption {
// Includes entire content of a file or a data stream.
CONTENT_UNSPECIFIED = 0;
// Text content within the data, excluding any metadata.
CONTENT_TEXT = 1;
// Images found in the data.
CONTENT_IMAGE = 2;
}
// A single inspection rule to be applied to infoTypes, specified in
// `InspectionRuleSet`.
message InspectionRule {
oneof type {
// Hotword-based detection rule.
CustomInfoType.DetectionRule.HotwordRule hotword_rule = 1;
// Exclusion rule.
ExclusionRule exclusion_rule = 2;
}
}
// Rule set for modifying a set of infoTypes to alter behavior under certain
// circumstances, depending on the specific details of the rules within the set.
message InspectionRuleSet {
// List of infoTypes this rule set is applied to.
repeated InfoType info_types = 1;
// Set of rules to be applied to infoTypes. The rules are applied in order.
repeated InspectionRule rules = 2;
}
// Configuration description of the scanning process.
// When used with redactContent only info_types and min_likelihood are currently
// used.
message InspectConfig {
message FindingLimits {
// Max findings configuration per infoType, per content item or long
// running DlpJob.
message InfoTypeLimit {
// Type of information the findings limit applies to. Only one limit per
// info_type should be provided. If InfoTypeLimit does not have an
// info_type, the DLP API applies the limit against all info_types that
// are found but not specified in another InfoTypeLimit.
InfoType info_type = 1;
// Max findings limit for the given infoType.
int32 max_findings = 2;
}
// Max number of findings that will be returned for each item scanned.
// When set within `InspectDataSourceRequest`,
// the maximum returned is 2000 regardless if this is set higher.
// When set within `InspectContentRequest`, this field is ignored.
int32 max_findings_per_item = 1;
// Max number of findings that will be returned per request/job.
// When set within `InspectContentRequest`, the maximum returned is 2000
// regardless if this is set higher.
int32 max_findings_per_request = 2;
// Configuration of findings limit given for specified infoTypes.
repeated InfoTypeLimit max_findings_per_info_type = 3;
}
// Restricts what info_types to look for. The values must correspond to
// InfoType values returned by ListInfoTypes or listed at
// https://cloud.google.com/dlp/docs/infotypes-reference.
//
// When no InfoTypes or CustomInfoTypes are specified in a request, the
// system may automatically choose what detectors to run. By default this may
// be all types, but may change over time as detectors are updated.
//
// The special InfoType name "ALL_BASIC" can be used to trigger all detectors,
// but may change over time as new InfoTypes are added. If you need precise
// control and predictability as to what detectors are run you should specify
// specific InfoTypes listed in the reference.
repeated InfoType info_types = 1;
// Only returns findings equal or above this threshold. The default is
// POSSIBLE.
// See https://cloud.google.com/dlp/docs/likelihood to learn more.
Likelihood min_likelihood = 2;
FindingLimits limits = 3;
// When true, a contextual quote from the data that triggered a finding is
// included in the response; see Finding.quote.
bool include_quote = 4;
// When true, excludes type information of the findings.
bool exclude_info_types = 5;
// CustomInfoTypes provided by the user. See
// https://cloud.google.com/dlp/docs/creating-custom-infotypes to learn more.
repeated CustomInfoType custom_info_types = 6;
// List of options defining data content to scan.
// If empty, text, images, and other content will be included.
repeated ContentOption content_options = 8;
// Set of rules to apply to the findings for this InspectConfig.
// Exclusion rules, contained in the set are executed in the end, other
// rules are executed in the order they are specified for each info type.
repeated InspectionRuleSet rule_set = 10;
}
// Container for bytes to inspect or redact.
message ByteContentItem {
enum BytesType {
BYTES_TYPE_UNSPECIFIED = 0;
IMAGE = 6;
IMAGE_JPEG = 1;
IMAGE_BMP = 2;
IMAGE_PNG = 3;
IMAGE_SVG = 4;
TEXT_UTF8 = 5;
}
// The type of data stored in the bytes string. Default will be TEXT_UTF8.
BytesType type = 1;
// Content data to inspect or redact.
bytes data = 2;
}
// Container structure for the content to inspect.
message ContentItem {
// Data of the item either in the byte array or UTF-8 string form, or table.
oneof data_item {
// String data to inspect or redact.
string value = 3;
// Structured content for inspection. See
// https://cloud.google.com/dlp/docs/inspecting-text#inspecting_a_table to
// learn more.
Table table = 4;
// Content data to inspect or redact. Replaces `type` and `data`.
ByteContentItem byte_item = 5;
}
}
// Structured content to inspect. Up to 50,000 `Value`s per request allowed.
// See https://cloud.google.com/dlp/docs/inspecting-text#inspecting_a_table to
// learn more.
message Table {
message Row {
repeated Value values = 1;
}
repeated FieldId headers = 1;
repeated Row rows = 2;
}
// All the findings for a single scanned item.
message InspectResult {
// List of findings for an item.
repeated Finding findings = 1;
// If true, then this item might have more findings than were returned,
// and the findings returned are an arbitrary subset of all findings.
// The findings list might be truncated because the input items were too
// large, or because the server reached the maximum amount of resources
// allowed for a single API call. For best results, divide the input into
// smaller batches.
bool findings_truncated = 2;
}
// Represents a piece of potentially sensitive content.
message Finding {
// The content that was found. Even if the content is not textual, it
// may be converted to a textual representation here.
// Provided if `include_quote` is true and the finding is
// less than or equal to 4096 bytes long. If the finding exceeds 4096 bytes
// in length, the quote may be omitted.
string quote = 1;
// The type of content that might have been found.
// Provided if `excluded_types` is false.
InfoType info_type = 2;
// Confidence of how likely it is that the `info_type` is correct.
Likelihood likelihood = 3;
// Where the content was found.
Location location = 4;
// Timestamp when finding was detected.
google.protobuf.Timestamp create_time = 6;
// Contains data parsed from quotes. Only populated if include_quote was set
// to true and a supported infoType was requested. Currently supported
// infoTypes: DATE, DATE_OF_BIRTH and TIME.
QuoteInfo quote_info = 7;
}
// Specifies the location of the finding.
message Location {
// Zero-based byte offsets delimiting the finding.
// These are relative to the finding's containing element.
// Note that when the content is not textual, this references
// the UTF-8 encoded textual representation of the content.
// Omitted if content is an image.
Range byte_range = 1;
// Unicode character offsets delimiting the finding.
// These are relative to the finding's containing element.
// Provided when the content is text.
Range codepoint_range = 2;
// List of nested objects pointing to the precise location of the finding
// within the file or record.
repeated ContentLocation content_locations = 7;
}
// Type of the match which can be applied to different ways of matching, like
// Dictionary, regular expression and intersecting with findings of another
// info type.
enum MatchingType {
// Invalid.
MATCHING_TYPE_UNSPECIFIED = 0;
// Full match.
//
// - Dictionary: join of Dictionary results matched complete finding quote
// - Regex: all regex matches fill a finding quote start to end
// - Exclude info type: completely inside affecting info types findings
MATCHING_TYPE_FULL_MATCH = 1;
// Partial match.
//
// - Dictionary: at least one of the tokens in the finding matches
// - Regex: substring of the finding matches
// - Exclude info type: intersects with affecting info types findings
MATCHING_TYPE_PARTIAL_MATCH = 2;
// Inverse match.
//
// - Dictionary: no tokens in the finding match the dictionary
// - Regex: finding doesn't match the regex
// - Exclude info type: no intersection with affecting info types findings
MATCHING_TYPE_INVERSE_MATCH = 3;
}
// Findings container location data.
message ContentLocation {
// Name of the container where the finding is located.
// The top level name is the source file name or table name. Names of some
// common storage containers are formatted as follows:
//
// * BigQuery tables: `<project_id>:<dataset_id>.<table_id>`
// * Cloud Storage files: `gs://<bucket>/<path>`
// * Datastore namespace: <namespace>
//
// Nested names could be absent if the embedded object has no string
// identifier (for an example an image contained within a document).
string container_name = 1;
// Type of the container within the file with location of the finding.
oneof location {
// Location within a row or record of a database table.
RecordLocation record_location = 2;
// Location within an image's pixels.
ImageLocation image_location = 3;
// Location data for document files.
DocumentLocation document_location = 5;
}
// Findings container modification timestamp, if applicable.
// For Google Cloud Storage contains last file modification timestamp.
// For BigQuery table contains last_modified_time property.
// For Datastore - not populated.
google.protobuf.Timestamp container_timestamp = 6;
// Findings container version, if available
// ("generation" for Google Cloud Storage).
string container_version = 7;
}
// Location of a finding within a document.
message DocumentLocation {
// Offset of the line, from the beginning of the file, where the finding
// is located.
int64 file_offset = 1;
}
// Location of a finding within a row or record.
message RecordLocation {
// Key of the finding.
RecordKey record_key = 1;
// Field id of the field containing the finding.
FieldId field_id = 2;
// Location within a `ContentItem.Table`.
TableLocation table_location = 3;
}
// Location of a finding within a table.
message TableLocation {
// The zero-based index of the row where the finding is located.
int64 row_index = 1;
}
// Generic half-open interval [start, end)
message Range {
// Index of the first character of the range (inclusive).
int64 start = 1;
// Index of the last character of the range (exclusive).
int64 end = 2;
}
// Location of the finding within an image.
message ImageLocation {
// Bounding boxes locating the pixels within the image containing the finding.
repeated BoundingBox bounding_boxes = 1;
}
// Bounding box encompassing detected text within an image.
message BoundingBox {
// Top coordinate of the bounding box. (0,0) is upper left.
int32 top = 1;
// Left coordinate of the bounding box. (0,0) is upper left.
int32 left = 2;
// Width of the bounding box in pixels.
int32 width = 3;
// Height of the bounding box in pixels.
int32 height = 4;
}
// Request to search for potentially sensitive info in an image and redact it
// by covering it with a colored rectangle.
message RedactImageRequest {
// Configuration for determining how redaction of images should occur.
message ImageRedactionConfig {
// Type of information to redact from images.
oneof target {
// Only one per info_type should be provided per request. If not
// specified, and redact_all_text is false, the DLP API will redact all
// text that it matches against all info_types that are found, but not
// specified in another ImageRedactionConfig.
InfoType info_type = 1;
// If true, all text found in the image, regardless whether it matches an
// info_type, is redacted. Only one should be provided.
bool redact_all_text = 2;
}
// The color to use when redacting content from an image. If not specified,
// the default is black.
Color redaction_color = 3;
}
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// Configuration for the inspector.
InspectConfig inspect_config = 2;
// The configuration for specifying what content to redact from images.
repeated ImageRedactionConfig image_redaction_configs = 5;
// Whether the response should include findings along with the redacted
// image.
bool include_findings = 6;
// The content must be PNG, JPEG, SVG or BMP.
ByteContentItem byte_item = 7;
}
// Represents a color in the RGB color space.
message Color {
// The amount of red in the color as a value in the interval [0, 1].
float red = 1;
// The amount of green in the color as a value in the interval [0, 1].
float green = 2;
// The amount of blue in the color as a value in the interval [0, 1].
float blue = 3;
}
// Results of redacting an image.
message RedactImageResponse {
// The redacted image. The type will be the same as the original image.
bytes redacted_image = 1;
// If an image was being inspected and the InspectConfig's include_quote was
// set to true, then this field will include all text, if any, that was found
// in the image.
string extracted_text = 2;
// The findings. Populated when include_findings in the request is true.
InspectResult inspect_result = 3;
}
// Request to de-identify a list of items.
message DeidentifyContentRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// Configuration for the de-identification of the content item.
// Items specified here will override the template referenced by the
// deidentify_template_name argument.
DeidentifyConfig deidentify_config = 2;
// Configuration for the inspector.
// Items specified here will override the template referenced by the
// inspect_template_name argument.
InspectConfig inspect_config = 3;
// The item to de-identify. Will be treated as text.
ContentItem item = 4;
// Optional template to use. Any configuration directly specified in
// inspect_config will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string inspect_template_name = 5;
// Optional template to use. Any configuration directly specified in
// deidentify_config will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string deidentify_template_name = 6;
}
// Results of de-identifying a ContentItem.
message DeidentifyContentResponse {
// The de-identified item.
ContentItem item = 1;
// An overview of the changes that were made on the `item`.
TransformationOverview overview = 2;
}
// Request to re-identify an item.
message ReidentifyContentRequest {
// The parent resource name.
string parent = 1;
// Configuration for the re-identification of the content item.
// This field shares the same proto message type that is used for
// de-identification, however its usage here is for the reversal of the
// previous de-identification. Re-identification is performed by examining
// the transformations used to de-identify the items and executing the
// reverse. This requires that only reversible transformations
// be provided here. The reversible transformations are:
//
// - `CryptoReplaceFfxFpeConfig`
DeidentifyConfig reidentify_config = 2;
// Configuration for the inspector.
InspectConfig inspect_config = 3;
// The item to re-identify. Will be treated as text.
ContentItem item = 4;
// Optional template to use. Any configuration directly specified in
// `inspect_config` will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string inspect_template_name = 5;
// Optional template to use. References an instance of `DeidentifyTemplate`.
// Any configuration directly specified in `reidentify_config` or
// `inspect_config` will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string reidentify_template_name = 6;
}
// Results of re-identifying a item.
message ReidentifyContentResponse {
// The re-identified item.
ContentItem item = 1;
// An overview of the changes that were made to the `item`.
TransformationOverview overview = 2;
}
// Request to search for potentially sensitive info in a ContentItem.
message InspectContentRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// Configuration for the inspector. What specified here will override
// the template referenced by the inspect_template_name argument.
InspectConfig inspect_config = 2;
// The item to inspect.
ContentItem item = 3;
// Optional template to use. Any configuration directly specified in
// inspect_config will override those set in the template. Singular fields
// that are set in this request will replace their corresponding fields in the
// template. Repeated fields are appended. Singular sub-messages and groups
// are recursively merged.
string inspect_template_name = 4;
}
// Results of inspecting an item.
message InspectContentResponse {
// The findings.
InspectResult result = 1;
}
// Cloud repository for storing output.
message OutputStorageConfig {
// Predefined schemas for storing findings.
enum OutputSchema {
OUTPUT_SCHEMA_UNSPECIFIED = 0;
// Basic schema including only `info_type`, `quote`, `certainty`, and
// `timestamp`.
BASIC_COLUMNS = 1;
// Schema tailored to findings from scanning Google Cloud Storage.
GCS_COLUMNS = 2;
// Schema tailored to findings from scanning Google Datastore.
DATASTORE_COLUMNS = 3;
// Schema tailored to findings from scanning Google BigQuery.
BIG_QUERY_COLUMNS = 4;
// Schema containing all columns.
ALL_COLUMNS = 5;
}
oneof type {
// Store findings in an existing table or a new table in an existing
// dataset. If table_id is not set a new one will be generated
// for you with the following format:
// dlp_googleapis_yyyy_mm_dd_[dlp_job_id]. Pacific timezone will be used for
// generating the date details.
//
// For Inspect, each column in an existing output table must have the same
// name, type, and mode of a field in the `Finding` object.
//
// For Risk, an existing output table should be the output of a previous
// Risk analysis job run on the same source table, with the same privacy
// metric and quasi-identifiers. Risk jobs that analyze the same table but
// compute a different privacy metric, or use different sets of
// quasi-identifiers, cannot store their results in the same table.
BigQueryTable table = 1;
}
// Schema used for writing the findings for Inspect jobs. This field is only
// used for Inspect and must be unspecified for Risk jobs. Columns are derived
// from the `Finding` object. If appending to an existing table, any columns
// from the predefined schema that are missing will be added. No columns in
// the existing table will be deleted.
//
// If unspecified, then all available columns will be used for a new table or
// an (existing) table with no schema, and no changes will be made to an
// existing table that has a schema.
OutputSchema output_schema = 3;
}
// Statistics regarding a specific InfoType.
message InfoTypeStats {
// The type of finding this stat is for.
InfoType info_type = 1;
// Number of findings for this infoType.
int64 count = 2;
}
// The results of an inspect DataSource job.
message InspectDataSourceDetails {
message RequestedOptions {
// If run with an InspectTemplate, a snapshot of its state at the time of
// this run.
InspectTemplate snapshot_inspect_template = 1;
InspectJobConfig job_config = 3;
}
message Result {
// Total size in bytes that were processed.
int64 processed_bytes = 1;
// Estimate of the number of bytes to process.
int64 total_estimated_bytes = 2;
// Statistics of how many instances of each info type were found during
// inspect job.
repeated InfoTypeStats info_type_stats = 3;
}
// The configuration used for this job.
RequestedOptions requested_options = 2;
// A summary of the outcome of this inspect job.
Result result = 3;
}
// InfoType description.
message InfoTypeDescription {
// Internal name of the infoType.
string name = 1;
// Human readable form of the infoType name.
string display_name = 2;
// Which parts of the API supports this InfoType.
repeated InfoTypeSupportedBy supported_by = 3;
// Description of the infotype. Translated when language is provided in the
// request.
string description = 4;
}
// Request for the list of infoTypes.
message ListInfoTypesRequest {
// Optional BCP-47 language code for localized infoType friendly
// names. If omitted, or if localized strings are not available,
// en-US strings will be returned.
string language_code = 1;
// Optional filter to only return infoTypes supported by certain parts of the
// API. Defaults to supported_by=INSPECT.
string filter = 2;
}
// Response to the ListInfoTypes request.
message ListInfoTypesResponse {
// Set of sensitive infoTypes.
repeated InfoTypeDescription info_types = 1;
}
// Configuration for a risk analysis job. See
// https://cloud.google.com/dlp/docs/concepts-risk-analysis to learn more.
message RiskAnalysisJobConfig {
// Privacy metric to compute.
PrivacyMetric privacy_metric = 1;
// Input dataset to compute metrics over.
BigQueryTable source_table = 2;
// Actions to execute at the completion of the job. Are executed in the order
// provided.
repeated Action actions = 3;
}
// A column with a semantic tag attached.
message QuasiId {
// Identifies the column. [required]
FieldId field = 1;
// Semantic tag that identifies what a column contains, to determine which
// statistical model to use to estimate the reidentifiability of each
// value. [required]
oneof tag {
// A column can be tagged with a InfoType to use the relevant public
// dataset as a statistical model of population, if available. We
// currently support US ZIP codes, region codes, ages and genders.
// To programmatically obtain the list of supported InfoTypes, use
// ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
InfoType info_type = 2;
// A column can be tagged with a custom tag. In this case, the user must
// indicate an auxiliary table that contains statistical information on
// the possible values of this column (below).
string custom_tag = 3;
// If no semantic tag is indicated, we infer the statistical model from
// the distribution of values in the input data
google.protobuf.Empty inferred = 4;
}
}
// An auxiliary table containing statistical information on the relative
// frequency of different quasi-identifiers values. It has one or several
// quasi-identifiers columns, and one column that indicates the relative
// frequency of each quasi-identifier tuple.
// If a tuple is present in the data but not in the auxiliary table, the
// corresponding relative frequency is assumed to be zero (and thus, the
// tuple is highly reidentifiable).
message StatisticalTable {
// A quasi-identifier column has a custom_tag, used to know which column
// in the data corresponds to which column in the statistical model.
message QuasiIdentifierField {
FieldId field = 1;
string custom_tag = 2;
}
// Auxiliary table location. [required]
BigQueryTable table = 3;
// Quasi-identifier columns. [required]
repeated QuasiIdentifierField quasi_ids = 1;
// The relative frequency column must contain a floating-point number
// between 0 and 1 (inclusive). Null values are assumed to be zero.
// [required]
FieldId relative_frequency = 2;
}
// Privacy metric to compute for reidentification risk analysis.
message PrivacyMetric {
// Compute numerical stats over an individual column, including
// min, max, and quantiles.
message NumericalStatsConfig {
// Field to compute numerical stats on. Supported types are
// integer, float, date, datetime, timestamp, time.
FieldId field = 1;
}
// Compute numerical stats over an individual column, including
// number of distinct values and value count distribution.
message CategoricalStatsConfig {
// Field to compute categorical stats on. All column types are
// supported except for arrays and structs. However, it may be more
// informative to use NumericalStats when the field type is supported,
// depending on the data.
FieldId field = 1;
}
// k-anonymity metric, used for analysis of reidentification risk.
message KAnonymityConfig {
// Set of fields to compute k-anonymity over. When multiple fields are
// specified, they are considered a single composite key. Structs and
// repeated data types are not supported; however, nested fields are
// supported so long as they are not structs themselves or nested within
// a repeated field.
repeated FieldId quasi_ids = 1;
// Optional message indicating that multiple rows might be associated to a
// single individual. If the same entity_id is associated to multiple
// quasi-identifier tuples over distinct rows, we consider the entire
// collection of tuples as the composite quasi-identifier. This collection
// is a multiset: the order in which the different tuples appear in the
// dataset is ignored, but their frequency is taken into account.
//
// Important note: a maximum of 1000 rows can be associated to a single
// entity ID. If more rows are associated with the same entity ID, some
// might be ignored.
EntityId entity_id = 2;
}
// l-diversity metric, used for analysis of reidentification risk.
message LDiversityConfig {
// Set of quasi-identifiers indicating how equivalence classes are
// defined for the l-diversity computation. When multiple fields are
// specified, they are considered a single composite key.
repeated FieldId quasi_ids = 1;
// Sensitive field for computing the l-value.
FieldId sensitive_attribute = 2;
}
// Reidentifiability metric. This corresponds to a risk model similar to what
// is called "journalist risk" in the literature, except the attack dataset is
// statistically modeled instead of being perfectly known. This can be done
// using publicly available data (like the US Census), or using a custom
// statistical model (indicated as one or several BigQuery tables), or by
// extrapolating from the distribution of values in the input dataset.
// A column with a semantic tag attached.
message KMapEstimationConfig {
message TaggedField {
// Identifies the column. [required]
FieldId field = 1;
// Semantic tag that identifies what a column contains, to determine which
// statistical model to use to estimate the reidentifiability of each
// value. [required]
oneof tag {
// A column can be tagged with a InfoType to use the relevant public
// dataset as a statistical model of population, if available. We
// currently support US ZIP codes, region codes, ages and genders.
// To programmatically obtain the list of supported InfoTypes, use
// ListInfoTypes with the supported_by=RISK_ANALYSIS filter.
InfoType info_type = 2;
// A column can be tagged with a custom tag. In this case, the user must
// indicate an auxiliary table that contains statistical information on
// the possible values of this column (below).
string custom_tag = 3;
// If no semantic tag is indicated, we infer the statistical model from
// the distribution of values in the input data
google.protobuf.Empty inferred = 4;
}
}
// An auxiliary table contains statistical information on the relative
// frequency of different quasi-identifiers values. It has one or several
// quasi-identifiers columns, and one column that indicates the relative
// frequency of each quasi-identifier tuple.
// If a tuple is present in the data but not in the auxiliary table, the
// corresponding relative frequency is assumed to be zero (and thus, the
// tuple is highly reidentifiable).
message AuxiliaryTable {
// A quasi-identifier column has a custom_tag, used to know which column
// in the data corresponds to which column in the statistical model.
message QuasiIdField {
FieldId field = 1;
string custom_tag = 2;
}
// Auxiliary table location. [required]
BigQueryTable table = 3;
// Quasi-identifier columns. [required]
repeated QuasiIdField quasi_ids = 1;
// The relative frequency column must contain a floating-point number
// between 0 and 1 (inclusive). Null values are assumed to be zero.
// [required]
FieldId relative_frequency = 2;
}
// Fields considered to be quasi-identifiers. No two columns can have the
// same tag. [required]
repeated TaggedField quasi_ids = 1;
// ISO 3166-1 alpha-2 region code to use in the statistical modeling.
// Required if no column is tagged with a region-specific InfoType (like
// US_ZIP_5) or a region code.
string region_code = 2;
// Several auxiliary tables can be used in the analysis. Each custom_tag
// used to tag a quasi-identifiers column must appear in exactly one column
// of one auxiliary table.
repeated AuxiliaryTable auxiliary_tables = 3;
}
// δ-presence metric, used to estimate how likely it is for an attacker to
// figure out that one given individual appears in a de-identified dataset.
// Similarly to the k-map metric, we cannot compute δ-presence exactly without
// knowing the attack dataset, so we use a statistical model instead.
message DeltaPresenceEstimationConfig {
// Fields considered to be quasi-identifiers. No two fields can have the
// same tag. [required]
repeated QuasiId quasi_ids = 1;
// ISO 3166-1 alpha-2 region code to use in the statistical modeling.
// Required if no column is tagged with a region-specific InfoType (like
// US_ZIP_5) or a region code.
string region_code = 2;
// Several auxiliary tables can be used in the analysis. Each custom_tag
// used to tag a quasi-identifiers field must appear in exactly one
// field of one auxiliary table.
repeated StatisticalTable auxiliary_tables = 3;
}
oneof type {
NumericalStatsConfig numerical_stats_config = 1;
CategoricalStatsConfig categorical_stats_config = 2;
KAnonymityConfig k_anonymity_config = 3;
LDiversityConfig l_diversity_config = 4;
KMapEstimationConfig k_map_estimation_config = 5;
DeltaPresenceEstimationConfig delta_presence_estimation_config = 6;
}
}
// Result of a risk analysis operation request.
message AnalyzeDataSourceRiskDetails {
// Result of the numerical stats computation.
message NumericalStatsResult {
// Minimum value appearing in the column.
Value min_value = 1;
// Maximum value appearing in the column.
Value max_value = 2;
// List of 99 values that partition the set of field values into 100 equal
// sized buckets.
repeated Value quantile_values = 4;
}
// Result of the categorical stats computation.
message CategoricalStatsResult {
message CategoricalStatsHistogramBucket {
// Lower bound on the value frequency of the values in this bucket.
int64 value_frequency_lower_bound = 1;
// Upper bound on the value frequency of the values in this bucket.
int64 value_frequency_upper_bound = 2;
// Total number of values in this bucket.
int64 bucket_size = 3;
// Sample of value frequencies in this bucket. The total number of
// values returned per bucket is capped at 20.
repeated ValueFrequency bucket_values = 4;
// Total number of distinct values in this bucket.
int64 bucket_value_count = 5;
}
// Histogram of value frequencies in the column.
repeated CategoricalStatsHistogramBucket value_frequency_histogram_buckets =
5;
}
// Result of the k-anonymity computation.
message KAnonymityResult {
// The set of columns' values that share the same ldiversity value
message KAnonymityEquivalenceClass {
// Set of values defining the equivalence class. One value per
// quasi-identifier column in the original KAnonymity metric message.
// The order is always the same as the original request.
repeated Value quasi_ids_values = 1;
// Size of the equivalence class, for example number of rows with the
// above set of values.
int64 equivalence_class_size = 2;
}
message KAnonymityHistogramBucket {
// Lower bound on the size of the equivalence classes in this bucket.
int64 equivalence_class_size_lower_bound = 1;
// Upper bound on the size of the equivalence classes in this bucket.
int64 equivalence_class_size_upper_bound = 2;
// Total number of equivalence classes in this bucket.
int64 bucket_size = 3;
// Sample of equivalence classes in this bucket. The total number of
// classes returned per bucket is capped at 20.
repeated KAnonymityEquivalenceClass bucket_values = 4;
// Total number of distinct equivalence classes in this bucket.
int64 bucket_value_count = 5;
}
// Histogram of k-anonymity equivalence classes.
repeated KAnonymityHistogramBucket equivalence_class_histogram_buckets = 5;
}
// Result of the l-diversity computation.
message LDiversityResult {
// The set of columns' values that share the same ldiversity value.
message LDiversityEquivalenceClass {
// Quasi-identifier values defining the k-anonymity equivalence
// class. The order is always the same as the original request.
repeated Value quasi_ids_values = 1;
// Size of the k-anonymity equivalence class.
int64 equivalence_class_size = 2;
// Number of distinct sensitive values in this equivalence class.
int64 num_distinct_sensitive_values = 3;
// Estimated frequencies of top sensitive values.
repeated ValueFrequency top_sensitive_values = 4;
}
message LDiversityHistogramBucket {
// Lower bound on the sensitive value frequencies of the equivalence
// classes in this bucket.
int64 sensitive_value_frequency_lower_bound = 1;
// Upper bound on the sensitive value frequencies of the equivalence
// classes in this bucket.
int64 sensitive_value_frequency_upper_bound = 2;
// Total number of equivalence classes in this bucket.
int64 bucket_size = 3;
// Sample of equivalence classes in this bucket. The total number of
// classes returned per bucket is capped at 20.
repeated LDiversityEquivalenceClass bucket_values = 4;
// Total number of distinct equivalence classes in this bucket.
int64 bucket_value_count = 5;
}
// Histogram of l-diversity equivalence class sensitive value frequencies.
repeated LDiversityHistogramBucket
sensitive_value_frequency_histogram_buckets = 5;
}
// Result of the reidentifiability analysis. Note that these results are an
// estimation, not exact values.
message KMapEstimationResult {
// A tuple of values for the quasi-identifier columns.
message KMapEstimationQuasiIdValues {
// The quasi-identifier values.
repeated Value quasi_ids_values = 1;
// The estimated anonymity for these quasi-identifier values.
int64 estimated_anonymity = 2;
}
// A KMapEstimationHistogramBucket message with the following values:
// min_anonymity: 3
// max_anonymity: 5
// frequency: 42
// means that there are 42 records whose quasi-identifier values correspond
// to 3, 4 or 5 people in the overlying population. An important particular
// case is when min_anonymity = max_anonymity = 1: the frequency field then
// corresponds to the number of uniquely identifiable records.
message KMapEstimationHistogramBucket {
// Always positive.
int64 min_anonymity = 1;
// Always greater than or equal to min_anonymity.
int64 max_anonymity = 2;
// Number of records within these anonymity bounds.
int64 bucket_size = 5;
// Sample of quasi-identifier tuple values in this bucket. The total
// number of classes returned per bucket is capped at 20.
repeated KMapEstimationQuasiIdValues bucket_values = 6;
// Total number of distinct quasi-identifier tuple values in this bucket.
int64 bucket_value_count = 7;
}
// The intervals [min_anonymity, max_anonymity] do not overlap. If a value
// doesn't correspond to any such interval, the associated frequency is
// zero. For example, the following records:
// {min_anonymity: 1, max_anonymity: 1, frequency: 17}
// {min_anonymity: 2, max_anonymity: 3, frequency: 42}
// {min_anonymity: 5, max_anonymity: 10, frequency: 99}
// mean that there are no record with an estimated anonymity of 4, 5, or
// larger than 10.
repeated KMapEstimationHistogramBucket k_map_estimation_histogram = 1;
}
// Result of the δ-presence computation. Note that these results are an
// estimation, not exact values.
message DeltaPresenceEstimationResult {
// A tuple of values for the quasi-identifier columns.
message DeltaPresenceEstimationQuasiIdValues {
// The quasi-identifier values.
repeated Value quasi_ids_values = 1;
// The estimated probability that a given individual sharing these
// quasi-identifier values is in the dataset. This value, typically called
// δ, is the ratio between the number of records in the dataset with these
// quasi-identifier values, and the total number of individuals (inside
// *and* outside the dataset) with these quasi-identifier values.
// For example, if there are 15 individuals in the dataset who share the
// same quasi-identifier values, and an estimated 100 people in the entire
// population with these values, then δ is 0.15.
double estimated_probability = 2;
}
// A DeltaPresenceEstimationHistogramBucket message with the following
// values:
// min_probability: 0.1
// max_probability: 0.2
// frequency: 42
// means that there are 42 records for which δ is in [0.1, 0.2). An
// important particular case is when min_probability = max_probability = 1:
// then, every individual who shares this quasi-identifier combination is in
// the dataset.
message DeltaPresenceEstimationHistogramBucket {
// Between 0 and 1.
double min_probability = 1;
// Always greater than or equal to min_probability.
double max_probability = 2;
// Number of records within these probability bounds.
int64 bucket_size = 5;
// Sample of quasi-identifier tuple values in this bucket. The total
// number of classes returned per bucket is capped at 20.
repeated DeltaPresenceEstimationQuasiIdValues bucket_values = 6;
// Total number of distinct quasi-identifier tuple values in this bucket.
int64 bucket_value_count = 7;
}
// The intervals [min_probability, max_probability) do not overlap. If a
// value doesn't correspond to any such interval, the associated frequency
// is zero. For example, the following records:
// {min_probability: 0, max_probability: 0.1, frequency: 17}
// {min_probability: 0.2, max_probability: 0.3, frequency: 42}
// {min_probability: 0.3, max_probability: 0.4, frequency: 99}
// mean that there are no record with an estimated probability in [0.1, 0.2)
// nor larger or equal to 0.4.
repeated DeltaPresenceEstimationHistogramBucket
delta_presence_estimation_histogram = 1;
}
// Privacy metric to compute.
PrivacyMetric requested_privacy_metric = 1;
// Input dataset to compute metrics over.
BigQueryTable requested_source_table = 2;
// Values associated with this metric.
oneof result {
NumericalStatsResult numerical_stats_result = 3;
CategoricalStatsResult categorical_stats_result = 4;
KAnonymityResult k_anonymity_result = 5;
LDiversityResult l_diversity_result = 6;
KMapEstimationResult k_map_estimation_result = 7;
DeltaPresenceEstimationResult delta_presence_estimation_result = 9;
}
}
// A value of a field, including its frequency.
message ValueFrequency {
// A value contained in the field in question.
Value value = 1;
// How many times the value is contained in the field.
int64 count = 2;
}
// Set of primitive values supported by the system.
// Note that for the purposes of inspection or transformation, the number
// of bytes considered to comprise a 'Value' is based on its representation
// as a UTF-8 encoded string. For example, if 'integer_value' is set to
// 123456789, the number of bytes would be counted as 9, even though an
// int64 only holds up to 8 bytes of data.
message Value {
oneof type {
int64 integer_value = 1;
double float_value = 2;
string string_value = 3;
bool boolean_value = 4;
google.protobuf.Timestamp timestamp_value = 5;
google.type.TimeOfDay time_value = 6;
google.type.Date date_value = 7;
google.type.DayOfWeek day_of_week_value = 8;
}
}
// Message for infoType-dependent details parsed from quote.
message QuoteInfo {
// Object representation of the quote.
oneof parsed_quote {
// The date time indicated by the quote.
DateTime date_time = 2;
}
}
// Message for a date time object.
// e.g. 2018-01-01, 5th August.
message DateTime {
message TimeZone {
// Set only if the offset can be determined. Positive for time ahead of UTC.
// E.g. For "UTC-9", this value is -540.
int32 offset_minutes = 1;
}
// One or more of the following must be set. All fields are optional, but
// when set must be valid date or time values.
google.type.Date date = 1;
google.type.DayOfWeek day_of_week = 2;
google.type.TimeOfDay time = 3;
TimeZone time_zone = 4;
}
// The configuration that controls how the data will change.
message DeidentifyConfig {
oneof transformation {
// Treat the dataset as free-form text and apply the same free text
// transformation everywhere.
InfoTypeTransformations info_type_transformations = 1;
// Treat the dataset as structured. Transformations can be applied to
// specific locations within structured datasets, such as transforming
// a column within a table.
RecordTransformations record_transformations = 2;
}
}
// A rule for transforming a value.
message PrimitiveTransformation {
oneof transformation {
ReplaceValueConfig replace_config = 1;
RedactConfig redact_config = 2;
CharacterMaskConfig character_mask_config = 3;
CryptoReplaceFfxFpeConfig crypto_replace_ffx_fpe_config = 4;
FixedSizeBucketingConfig fixed_size_bucketing_config = 5;
BucketingConfig bucketing_config = 6;
ReplaceWithInfoTypeConfig replace_with_info_type_config = 7;
TimePartConfig time_part_config = 8;
CryptoHashConfig crypto_hash_config = 9;
DateShiftConfig date_shift_config = 11;
}
}
// For use with `Date`, `Timestamp`, and `TimeOfDay`, extract or preserve a
// portion of the value.
message TimePartConfig {
enum TimePart {
TIME_PART_UNSPECIFIED = 0;
// [0-9999]
YEAR = 1;
// [1-12]
MONTH = 2;
// [1-31]
DAY_OF_MONTH = 3;
// [1-7]
DAY_OF_WEEK = 4;
// [1-52]
WEEK_OF_YEAR = 5;
// [0-23]
HOUR_OF_DAY = 6;
}
TimePart part_to_extract = 1;
}
// Pseudonymization method that generates surrogates via cryptographic hashing.
// Uses SHA-256.
// The key size must be either 32 or 64 bytes.
// Outputs a base64 encoded representation of the hashed output
// (for example, L7k0BHmF1ha5U3NfGykjro4xWi1MPVQPjhMAZbSV9mM=).
// Currently, only string and integer values can be hashed.
// See https://cloud.google.com/dlp/docs/pseudonymization to learn more.
message CryptoHashConfig {
// The key used by the hash function.
CryptoKey crypto_key = 1;
}
// Replace each input value with a given `Value`.
message ReplaceValueConfig {
// Value to replace it with.
Value new_value = 1;
}
// Replace each matching finding with the name of the info_type.
message ReplaceWithInfoTypeConfig {}
// Redact a given value. For example, if used with an `InfoTypeTransformation`
// transforming PHONE_NUMBER, and input 'My phone number is 206-555-0123', the
// output would be 'My phone number is '.
message RedactConfig {}
// Characters to skip when doing deidentification of a value. These will be left
// alone and skipped.
message CharsToIgnore {
enum CommonCharsToIgnore {
COMMON_CHARS_TO_IGNORE_UNSPECIFIED = 0;
// 0-9
NUMERIC = 1;
// A-Z
ALPHA_UPPER_CASE = 2;
// a-z
ALPHA_LOWER_CASE = 3;
// US Punctuation, one of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
PUNCTUATION = 4;
// Whitespace character, one of [ \t\n\x0B\f\r]
WHITESPACE = 5;
}
oneof characters {
string characters_to_skip = 1;
CommonCharsToIgnore common_characters_to_ignore = 2;
}
}
// Partially mask a string by replacing a given number of characters with a
// fixed character. Masking can start from the beginning or end of the string.
// This can be used on data of any type (numbers, longs, and so on) and when
// de-identifying structured data we'll attempt to preserve the original data's
// type. (This allows you to take a long like 123 and modify it to a string like
// **3.
message CharacterMaskConfig {
// Character to mask the sensitive values&mdash;for example, "*" for an
// alphabetic string such as name, or "0" for a numeric string such as ZIP
// code or credit card number. String must have length 1. If not supplied, we
// will default to "*" for strings, 0 for digits.
string masking_character = 1;
// Number of characters to mask. If not set, all matching chars will be
// masked. Skipped characters do not count towards this tally.
int32 number_to_mask = 2;
// Mask characters in reverse order. For example, if `masking_character` is
// '0', number_to_mask is 14, and `reverse_order` is false, then
// 1234-5678-9012-3456 -> 00000000000000-3456
// If `masking_character` is '*', `number_to_mask` is 3, and `reverse_order`
// is true, then 12345 -> 12***
bool reverse_order = 3;
// When masking a string, items in this list will be skipped when replacing.
// For example, if your string is 555-555-5555 and you ask us to skip `-` and
// mask 5 chars with * we would produce ***-*55-5555.
repeated CharsToIgnore characters_to_ignore = 4;
}
// Buckets values based on fixed size ranges. The
// Bucketing transformation can provide all of this functionality,
// but requires more configuration. This message is provided as a convenience to
// the user for simple bucketing strategies.
//
// The transformed value will be a hyphenated string of
// <lower_bound>-<upper_bound>, i.e if lower_bound = 10 and upper_bound = 20
// all values that are within this bucket will be replaced with "10-20".
//
// This can be used on data of type: double, long.
//
// If the bound Value type differs from the type of data
// being transformed, we will first attempt converting the type of the data to
// be transformed to match the type of the bound before comparing.
//
// See https://cloud.google.com/dlp/docs/concepts-bucketing to learn more.
message FixedSizeBucketingConfig {
// Lower bound value of buckets. All values less than `lower_bound` are
// grouped together into a single bucket; for example if `lower_bound` = 10,
// then all values less than 10 are replaced with the value “-10”. [Required].
Value lower_bound = 1;
// Upper bound value of buckets. All values greater than upper_bound are
// grouped together into a single bucket; for example if `upper_bound` = 89,
// then all values greater than 89 are replaced with the value “89+”.
// [Required].
Value upper_bound = 2;
// Size of each bucket (except for minimum and maximum buckets). So if
// `lower_bound` = 10, `upper_bound` = 89, and `bucket_size` = 10, then the
// following buckets would be used: -10, 10-20, 20-30, 30-40, 40-50, 50-60,
// 60-70, 70-80, 80-89, 89+. Precision up to 2 decimals works. [Required].
double bucket_size = 3;
}
// Generalization function that buckets values based on ranges. The ranges and
// replacement values are dynamically provided by the user for custom behavior,
// such as 1-30 -> LOW 31-65 -> MEDIUM 66-100 -> HIGH
// This can be used on
// data of type: number, long, string, timestamp.
// If the bound `Value` type differs from the type of data being transformed, we
// will first attempt converting the type of the data to be transformed to match
// the type of the bound before comparing.
// See https://cloud.google.com/dlp/docs/concepts-bucketing to learn more.
message BucketingConfig {
// Bucket is represented as a range, along with replacement values.
message Bucket {
// Lower bound of the range, inclusive. Type should be the same as max if
// used.
Value min = 1;
// Upper bound of the range, exclusive; type must match min.
Value max = 2;
// Replacement value for this bucket. If not provided
// the default behavior will be to hyphenate the min-max range.
Value replacement_value = 3;
}
// Set of buckets. Ranges must be non-overlapping.
repeated Bucket buckets = 1;
}
// Replaces an identifier with a surrogate using FPE with the FFX
// mode of operation; however when used in the `ReidentifyContent` API method,
// it serves the opposite function by reversing the surrogate back into
// the original identifier.
// The identifier must be encoded as ASCII.
// For a given crypto key and context, the same identifier will be
// replaced with the same surrogate.
// Identifiers must be at least two characters long.
// In the case that the identifier is the empty string, it will be skipped.
// See https://cloud.google.com/dlp/docs/pseudonymization to learn more.
message CryptoReplaceFfxFpeConfig {
// These are commonly used subsets of the alphabet that the FFX mode
// natively supports. In the algorithm, the alphabet is selected using
// the "radix". Therefore each corresponds to particular radix.
enum FfxCommonNativeAlphabet {
FFX_COMMON_NATIVE_ALPHABET_UNSPECIFIED = 0;
// [0-9] (radix of 10)
NUMERIC = 1;
// [0-9A-F] (radix of 16)
HEXADECIMAL = 2;
// [0-9A-Z] (radix of 36)
UPPER_CASE_ALPHA_NUMERIC = 3;
// [0-9A-Za-z] (radix of 62)
ALPHA_NUMERIC = 4;
}
// The key used by the encryption algorithm. [required]
CryptoKey crypto_key = 1;
// The 'tweak', a context may be used for higher security since the same
// identifier in two different contexts won't be given the same surrogate. If
// the context is not set, a default tweak will be used.
//
// If the context is set but:
//
// 1. there is no record present when transforming a given value or
// 1. the field is not present when transforming a given value,
//
// a default tweak will be used.
//
// Note that case (1) is expected when an `InfoTypeTransformation` is
// applied to both structured and non-structured `ContentItem`s.
// Currently, the referenced field may be of value type integer or string.
//
// The tweak is constructed as a sequence of bytes in big endian byte order
// such that:
//
// - a 64 bit integer is encoded followed by a single byte of value 1
// - a string is encoded in UTF-8 format followed by a single byte of value 2
FieldId context = 2;
oneof alphabet {
FfxCommonNativeAlphabet common_alphabet = 4;
// This is supported by mapping these to the alphanumeric characters
// that the FFX mode natively supports. This happens before/after
// encryption/decryption.
// Each character listed must appear only once.
// Number of characters must be in the range [2, 62].
// This must be encoded as ASCII.
// The order of characters does not matter.
string custom_alphabet = 5;
// The native way to select the alphabet. Must be in the range [2, 62].
int32 radix = 6;
}
// The custom infoType to annotate the surrogate with.
// This annotation will be applied to the surrogate by prefixing it with
// the name of the custom infoType followed by the number of
// characters comprising the surrogate. The following scheme defines the
// format: info_type_name(surrogate_character_count):surrogate
//
// For example, if the name of custom infoType is 'MY_TOKEN_INFO_TYPE' and
// the surrogate is 'abc', the full replacement value
// will be: 'MY_TOKEN_INFO_TYPE(3):abc'
//
// This annotation identifies the surrogate when inspecting content using the
// custom infoType
// [`SurrogateType`](/dlp/docs/reference/rest/v2/InspectConfig#surrogatetype).
// This facilitates reversal of the surrogate when it occurs in free text.
//
// In order for inspection to work properly, the name of this infoType must
// not occur naturally anywhere in your data; otherwise, inspection may
// find a surrogate that does not correspond to an actual identifier.
// Therefore, choose your custom infoType name carefully after considering
// what your data looks like. One way to select a name that has a high chance
// of yielding reliable detection is to include one or more unicode characters
// that are highly improbable to exist in your data.
// For example, assuming your data is entered from a regular ASCII keyboard,
// the symbol with the hex code point 29DD might be used like so:
// ⧝MY_TOKEN_TYPE
InfoType surrogate_info_type = 8;
}
// This is a data encryption key (DEK) (as opposed to
// a key encryption key (KEK) stored by KMS).
// When using KMS to wrap/unwrap DEKs, be sure to set an appropriate
// IAM policy on the KMS CryptoKey (KEK) to ensure an attacker cannot
// unwrap the data crypto key.
message CryptoKey {
oneof source {
TransientCryptoKey transient = 1;
UnwrappedCryptoKey unwrapped = 2;
KmsWrappedCryptoKey kms_wrapped = 3;
}
}
// Use this to have a random data crypto key generated.
// It will be discarded after the request finishes.
message TransientCryptoKey {
// Name of the key. [required]
// This is an arbitrary string used to differentiate different keys.
// A unique key is generated per name: two separate `TransientCryptoKey`
// protos share the same generated key if their names are the same.
// When the data crypto key is generated, this name is not used in any way
// (repeating the api call will result in a different key being generated).
string name = 1;
}
// Using raw keys is prone to security risks due to accidentally
// leaking the key. Choose another type of key if possible.
message UnwrappedCryptoKey {
// The AES 128/192/256 bit key. [required]
bytes key = 1;
}
// Parts of the APIs which use certain infoTypes.
enum InfoTypeSupportedBy {
ENUM_TYPE_UNSPECIFIED = 0;
// Supported by the inspect operations.
INSPECT = 1;
// Supported by the risk analysis operations.
RISK_ANALYSIS = 2;
}
// Include to use an existing data crypto key wrapped by KMS.
// Authorization requires the following IAM permissions when sending a request
// to perform a crypto transformation using a kms-wrapped crypto key:
// dlp.kms.encrypt
message KmsWrappedCryptoKey {
// The wrapped data crypto key. [required]
bytes wrapped_key = 1;
// The resource name of the KMS CryptoKey to use for unwrapping. [required]
string crypto_key_name = 2;
}
// Shifts dates by random number of days, with option to be consistent for the
// same context. See https://cloud.google.com/dlp/docs/concepts-date-shifting
// to learn more.
message DateShiftConfig {
// Range of shift in days. Actual shift will be selected at random within this
// range (inclusive ends). Negative means shift to earlier in time. Must not
// be more than 365250 days (1000 years) each direction.
//
// For example, 3 means shift date to at most 3 days into the future.
// [Required]
int32 upper_bound_days = 1;
// For example, -5 means shift date to at most 5 days back in the past.
// [Required]
int32 lower_bound_days = 2;
// Points to the field that contains the context, for example, an entity id.
// If set, must also set method. If set, shift will be consistent for the
// given context.
FieldId context = 3;
// Method for calculating shift that takes context into consideration. If
// set, must also set context. Can only be applied to table items.
oneof method {
// Causes the shift to be computed based on this key and the context. This
// results in the same shift for the same context and crypto_key.
CryptoKey crypto_key = 4;
}
}
// A type of transformation that will scan unstructured text and
// apply various `PrimitiveTransformation`s to each finding, where the
// transformation is applied to only values that were identified as a specific
// info_type.
message InfoTypeTransformations {
// A transformation to apply to text that is identified as a specific
// info_type.
message InfoTypeTransformation {
// InfoTypes to apply the transformation to. An empty list will cause
// this transformation to apply to all findings that correspond to
// infoTypes that were requested in `InspectConfig`.
repeated InfoType info_types = 1;
// Primitive transformation to apply to the infoType. [required]
PrimitiveTransformation primitive_transformation = 2;
}
// Transformation for each infoType. Cannot specify more than one
// for a given infoType. [required]
repeated InfoTypeTransformation transformations = 1;
}
// The transformation to apply to the field.
message FieldTransformation {
// Input field(s) to apply the transformation to. [required]
repeated FieldId fields = 1;
// Only apply the transformation if the condition evaluates to true for the
// given `RecordCondition`. The conditions are allowed to reference fields
// that are not used in the actual transformation. [optional]
//
// Example Use Cases:
//
// - Apply a different bucket transformation to an age column if the zip code
// column for the same record is within a specific range.
// - Redact a field if the date of birth field is greater than 85.
RecordCondition condition = 3;
// Transformation to apply. [required]
oneof transformation {
// Apply the transformation to the entire field.
PrimitiveTransformation primitive_transformation = 4;
// Treat the contents of the field as free text, and selectively
// transform content that matches an `InfoType`.
InfoTypeTransformations info_type_transformations = 5;
}
}
// A type of transformation that is applied over structured data such as a
// table.
message RecordTransformations {
// Transform the record by applying various field transformations.
repeated FieldTransformation field_transformations = 1;
// Configuration defining which records get suppressed entirely. Records that
// match any suppression rule are omitted from the output [optional].
repeated RecordSuppression record_suppressions = 2;
}
// Configuration to suppress records whose suppression conditions evaluate to
// true.
message RecordSuppression {
// A condition that when it evaluates to true will result in the record being
// evaluated to be suppressed from the transformed content.
RecordCondition condition = 1;
}
// A condition for determining whether a transformation should be applied to
// a field.
message RecordCondition {
// The field type of `value` and `field` do not need to match to be
// considered equal, but not all comparisons are possible.
// EQUAL_TO and NOT_EQUAL_TO attempt to compare even with incompatible types,
// but all other comparisons are invalid with incompatible types.
// A `value` of type:
//
// - `string` can be compared against all other types
// - `boolean` can only be compared against other booleans
// - `integer` can be compared against doubles or a string if the string value
// can be parsed as an integer.
// - `double` can be compared against integers or a string if the string can
// be parsed as a double.
// - `Timestamp` can be compared against strings in RFC 3339 date string
// format.
// - `TimeOfDay` can be compared against timestamps and strings in the format
// of 'HH:mm:ss'.
//
// If we fail to compare do to type mismatch, a warning will be given and
// the condition will evaluate to false.
message Condition {
// Field within the record this condition is evaluated against. [required]
FieldId field = 1;
// Operator used to compare the field or infoType to the value. [required]
RelationalOperator operator = 3;
// Value to compare against. [Required, except for `EXISTS` tests.]
Value value = 4;
}
// A collection of conditions.
message Conditions {
repeated Condition conditions = 1;
}
// An expression, consisting or an operator and conditions.
message Expressions {
enum LogicalOperator {
LOGICAL_OPERATOR_UNSPECIFIED = 0;
AND = 1;
}
// The operator to apply to the result of conditions. Default and currently
// only supported value is `AND`.
LogicalOperator logical_operator = 1;
oneof type {
Conditions conditions = 3;
}
}
// An expression.
Expressions expressions = 3;
}
// Overview of the modifications that occurred.
message TransformationOverview {
// Total size in bytes that were transformed in some way.
int64 transformed_bytes = 2;
// Transformations applied to the dataset.
repeated TransformationSummary transformation_summaries = 3;
}
// Summary of a single tranformation.
// Only one of 'transformation', 'field_transformation', or 'record_suppress'
// will be set.
message TransformationSummary {
// A collection that informs the user the number of times a particular
// `TransformationResultCode` and error details occurred.
message SummaryResult {
int64 count = 1;
TransformationResultCode code = 2;
// A place for warnings or errors to show up if a transformation didn't
// work as expected.
string details = 3;
}
// Possible outcomes of transformations.
enum TransformationResultCode {
TRANSFORMATION_RESULT_CODE_UNSPECIFIED = 0;
SUCCESS = 1;
ERROR = 2;
}
// Set if the transformation was limited to a specific InfoType.
InfoType info_type = 1;
// Set if the transformation was limited to a specific FieldId.
FieldId field = 2;
// The specific transformation these stats apply to.
PrimitiveTransformation transformation = 3;
// The field transformation that was applied.
// If multiple field transformations are requested for a single field,
// this list will contain all of them; otherwise, only one is supplied.
repeated FieldTransformation field_transformations = 5;
// The specific suppression option these stats apply to.
RecordSuppression record_suppress = 6;
repeated SummaryResult results = 4;
// Total size in bytes that were transformed in some way.
int64 transformed_bytes = 7;
}
// Schedule for triggeredJobs.
message Schedule {
oneof option {
// With this option a job is started a regular periodic basis. For
// example: every day (86400 seconds).
//
// A scheduled start time will be skipped if the previous
// execution has not ended when its scheduled time occurs.
//
// This value must be set to a time duration greater than or equal
// to 1 day and can be no longer than 60 days.
google.protobuf.Duration recurrence_period_duration = 1;
}
}
// The inspectTemplate contains a configuration (set of types of sensitive data
// to be detected) to be used anywhere you otherwise would normally specify
// InspectConfig. See https://cloud.google.com/dlp/docs/concepts-templates
// to learn more.
message InspectTemplate {
// The template name. Output only.
//
// The template will have one of the following formats:
// `projects/PROJECT_ID/inspectTemplates/TEMPLATE_ID` OR
// `organizations/ORGANIZATION_ID/inspectTemplates/TEMPLATE_ID`
string name = 1;
// Display name (max 256 chars).
string display_name = 2;
// Short description (max 256 chars).
string description = 3;
// The creation timestamp of a inspectTemplate, output only field.
google.protobuf.Timestamp create_time = 4;
// The last update timestamp of a inspectTemplate, output only field.
google.protobuf.Timestamp update_time = 5;
// The core content of the template. Configuration of the scanning process.
InspectConfig inspect_config = 6;
}
// The DeidentifyTemplates contains instructions on how to deidentify content.
// See https://cloud.google.com/dlp/docs/concepts-templates to learn more.
message DeidentifyTemplate {
// The template name. Output only.
//
// The template will have one of the following formats:
// `projects/PROJECT_ID/deidentifyTemplates/TEMPLATE_ID` OR
// `organizations/ORGANIZATION_ID/deidentifyTemplates/TEMPLATE_ID`
string name = 1;
// Display name (max 256 chars).
string display_name = 2;
// Short description (max 256 chars).
string description = 3;
// The creation timestamp of a inspectTemplate, output only field.
google.protobuf.Timestamp create_time = 4;
// The last update timestamp of a inspectTemplate, output only field.
google.protobuf.Timestamp update_time = 5;
// ///////////// // The core content of the template // ///////////////
DeidentifyConfig deidentify_config = 6;
}
// Details information about an error encountered during job execution or
// the results of an unsuccessful activation of the JobTrigger.
// Output only field.
message Error {
google.rpc.Status details = 1;
// The times the error occurred.
repeated google.protobuf.Timestamp timestamps = 2;
}
// Contains a configuration to make dlp api calls on a repeating basis.
// See https://cloud.google.com/dlp/docs/concepts-job-triggers to learn more.
message JobTrigger {
// What event needs to occur for a new job to be started.
message Trigger {
oneof trigger {
// Create a job on a repeating basis based on the elapse of time.
Schedule schedule = 1;
}
}
// Whether the trigger is currently active. If PAUSED or CANCELLED, no jobs
// will be created with this configuration. The service may automatically
// pause triggers experiencing frequent errors. To restart a job, set the
// status to HEALTHY after correcting user errors.
enum Status {
STATUS_UNSPECIFIED = 0;
// Trigger is healthy.
HEALTHY = 1;
// Trigger is temporarily paused.
PAUSED = 2;
// Trigger is cancelled and can not be resumed.
CANCELLED = 3;
}
// Unique resource name for the triggeredJob, assigned by the service when the
// triggeredJob is created, for example
// `projects/dlp-test-project/triggeredJobs/53234423`.
string name = 1;
// Display name (max 100 chars)
string display_name = 2;
// User provided description (max 256 chars)
string description = 3;
// The configuration details for the specific type of job to run.
oneof job {
InspectJobConfig inspect_job = 4;
}
// A list of triggers which will be OR'ed together. Only one in the list
// needs to trigger for a job to be started. The list may contain only
// a single Schedule trigger and must have at least one object.
repeated Trigger triggers = 5;
// A stream of errors encountered when the trigger was activated. Repeated
// errors may result in the JobTrigger automatically being paused.
// Will return the last 100 errors. Whenever the JobTrigger is modified
// this list will be cleared. Output only field.
repeated Error errors = 6;
// The creation timestamp of a triggeredJob, output only field.
google.protobuf.Timestamp create_time = 7;
// The last update timestamp of a triggeredJob, output only field.
google.protobuf.Timestamp update_time = 8;
// The timestamp of the last time this trigger executed, output only field.
google.protobuf.Timestamp last_run_time = 9;
// A status for this trigger. [required]
Status status = 10;
}
// A task to execute on the completion of a job.
// See https://cloud.google.com/dlp/docs/concepts-actions to learn more.
message Action {
// If set, the detailed findings will be persisted to the specified
// OutputStorageConfig. Only a single instance of this action can be
// specified.
// Compatible with: Inspect, Risk
message SaveFindings {
OutputStorageConfig output_config = 1;
}
// Publish the results of a DlpJob to a pub sub channel.
// Compatible with: Inspect, Risk
message PublishToPubSub {
// Cloud Pub/Sub topic to send notifications to. The topic must have given
// publishing access rights to the DLP API service account executing
// the long running DlpJob sending the notifications.
// Format is projects/{project}/topics/{topic}.
string topic = 1;
}
// Publish the result summary of a DlpJob to the Cloud Security
// Command Center (CSCC Alpha).
// This action is only available for projects which are parts of
// an organization and whitelisted for the alpha Cloud Security Command
// Center.
// The action will publish count of finding instances and their info types.
// The summary of findings will be persisted in CSCC and are governed by CSCC
// service-specific policy, see https://cloud.google.com/terms/service-terms
// Only a single instance of this action can be specified.
// Compatible with: Inspect
message PublishSummaryToCscc {}
// Enable email notification to project owners and editors on jobs's
// completion/failure.
message JobNotificationEmails {}
oneof action {
// Save resulting findings in a provided location.
SaveFindings save_findings = 1;
// Publish a notification to a pubsub topic.
PublishToPubSub pub_sub = 2;
// Publish summary to Cloud Security Command Center (Alpha).
PublishSummaryToCscc publish_summary_to_cscc = 3;
// Enable email notification to project owners and editors on jobs
// completion/failure.
JobNotificationEmails job_notification_emails = 8;
}
}
// Request message for CreateInspectTemplate.
message CreateInspectTemplateRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// The InspectTemplate to create.
InspectTemplate inspect_template = 2;
// The template id can contain uppercase and lowercase letters,
// numbers, and hyphens; that is, it must match the regular
// expression: `[a-zA-Z\\d-]+`. The maximum length is 100
// characters. Can be empty to allow the system to generate one.
string template_id = 3;
}
// Request message for UpdateInspectTemplate.
message UpdateInspectTemplateRequest {
// Resource name of organization and inspectTemplate to be updated, for
// example `organizations/433245324/inspectTemplates/432452342` or
// projects/project-id/inspectTemplates/432452342.
string name = 1;
// New InspectTemplate value.
InspectTemplate inspect_template = 2;
// Mask to control which fields get updated.
google.protobuf.FieldMask update_mask = 3;
}
// Request message for GetInspectTemplate.
message GetInspectTemplateRequest {
// Resource name of the organization and inspectTemplate to be read, for
// example `organizations/433245324/inspectTemplates/432452342` or
// projects/project-id/inspectTemplates/432452342.
string name = 1;
}
// Request message for ListInspectTemplates.
message ListInspectTemplatesRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// Optional page token to continue retrieval. Comes from previous call
// to `ListInspectTemplates`.
string page_token = 2;
// Optional size of the page, can be limited by server. If zero server returns
// a page of max size 100.
int32 page_size = 3;
// Optional comma separated list of fields to order by,
// followed by `asc` or `desc` postfix. This list is case-insensitive,
// default sorting order is ascending, redundant space characters are
// insignificant.
//
// Example: `name asc,update_time, create_time desc`
//
// Supported fields are:
//
// - `create_time`: corresponds to time the template was created.
// - `update_time`: corresponds to time the template was last updated.
// - `name`: corresponds to template's name.
// - `display_name`: corresponds to template's display name.
string order_by = 4;
}
// Response message for ListInspectTemplates.
message ListInspectTemplatesResponse {
// List of inspectTemplates, up to page_size in ListInspectTemplatesRequest.
repeated InspectTemplate inspect_templates = 1;
// If the next page is available then the next page token to be used
// in following ListInspectTemplates request.
string next_page_token = 2;
}
// Request message for DeleteInspectTemplate.
message DeleteInspectTemplateRequest {
// Resource name of the organization and inspectTemplate to be deleted, for
// example `organizations/433245324/inspectTemplates/432452342` or
// projects/project-id/inspectTemplates/432452342.
string name = 1;
}
// Request message for CreateJobTrigger.
message CreateJobTriggerRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// The JobTrigger to create.
JobTrigger job_trigger = 2;
// The trigger id can contain uppercase and lowercase letters,
// numbers, and hyphens; that is, it must match the regular
// expression: `[a-zA-Z\\d-]+`. The maximum length is 100
// characters. Can be empty to allow the system to generate one.
string trigger_id = 3;
}
// Request message for ActivateJobTrigger.
message ActivateJobTriggerRequest {
// Resource name of the trigger to activate, for example
// `projects/dlp-test-project/jobTriggers/53234423`.
string name = 1;
}
// Request message for UpdateJobTrigger.
message UpdateJobTriggerRequest {
// Resource name of the project and the triggeredJob, for example
// `projects/dlp-test-project/jobTriggers/53234423`.
string name = 1;
// New JobTrigger value.
JobTrigger job_trigger = 2;
// Mask to control which fields get updated.
google.protobuf.FieldMask update_mask = 3;
}
// Request message for GetJobTrigger.
message GetJobTriggerRequest {
// Resource name of the project and the triggeredJob, for example
// `projects/dlp-test-project/jobTriggers/53234423`.
string name = 1;
}
// Request message for CreateDlpJobRequest. Used to initiate long running
// jobs such as calculating risk metrics or inspecting Google Cloud
// Storage.
message CreateDlpJobRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 1;
// The configuration details for the specific type of job to run.
oneof job {
InspectJobConfig inspect_job = 2;
RiskAnalysisJobConfig risk_job = 3;
}
// The job id can contain uppercase and lowercase letters,
// numbers, and hyphens; that is, it must match the regular
// expression: `[a-zA-Z\\d-]+`. The maximum length is 100
// characters. Can be empty to allow the system to generate one.
string job_id = 4;
}
// Request message for ListJobTriggers.
message ListJobTriggersRequest {
// The parent resource name, for example `projects/my-project-id`.
string parent = 1;
// Optional page token to continue retrieval. Comes from previous call
// to ListJobTriggers. `order_by` field must not
// change for subsequent calls.
string page_token = 2;
// Optional size of the page, can be limited by a server.
int32 page_size = 3;
// Optional comma separated list of triggeredJob fields to order by,
// followed by `asc` or `desc` postfix. This list is case-insensitive,
// default sorting order is ascending, redundant space characters are
// insignificant.
//
// Example: `name asc,update_time, create_time desc`
//
// Supported fields are:
//
// - `create_time`: corresponds to time the JobTrigger was created.
// - `update_time`: corresponds to time the JobTrigger was last updated.
// - `last_run_time`: corresponds to the last time the JobTrigger ran.
// - `name`: corresponds to JobTrigger's name.
// - `display_name`: corresponds to JobTrigger's display name.
// - `status`: corresponds to JobTrigger's status.
string order_by = 4;
// Optional. Allows filtering.
//
// Supported syntax:
//
// * Filter expressions are made up of one or more restrictions.
// * Restrictions can be combined by `AND` or `OR` logical operators. A
// sequence of restrictions implicitly uses `AND`.
// * A restriction has the form of `<field> <operator> <value>`.
// * Supported fields/values for inspect jobs:
// - `status` - HEALTHY|PAUSED|CANCELLED
// - `inspected_storage` - DATASTORE|CLOUD_STORAGE|BIGQUERY
// - 'last_run_time` - RFC 3339 formatted timestamp, surrounded by
// quotation marks. Nanoseconds are ignored.
// - 'error_count' - Number of errors that have occurred while running.
// * The operator must be `=` or `!=` for status and inspected_storage.
//
// Examples:
//
// * inspected_storage = cloud_storage AND status = HEALTHY
// * inspected_storage = cloud_storage OR inspected_storage = bigquery
// * inspected_storage = cloud_storage AND (state = PAUSED OR state = HEALTHY)
// * last_run_time > \"2017-12-12T00:00:00+00:00\"
//
// The length of this field should be no more than 500 characters.
string filter = 5;
}
// Response message for ListJobTriggers.
message ListJobTriggersResponse {
// List of triggeredJobs, up to page_size in ListJobTriggersRequest.
repeated JobTrigger job_triggers = 1;
// If the next page is available then the next page token to be used
// in following ListJobTriggers request.
string next_page_token = 2;
}
// Request message for DeleteJobTrigger.
message DeleteJobTriggerRequest {
// Resource name of the project and the triggeredJob, for example
// `projects/dlp-test-project/jobTriggers/53234423`.
string name = 1;
}
message InspectJobConfig {
// The data to scan.
StorageConfig storage_config = 1;
// How and what to scan for.
InspectConfig inspect_config = 2;
// If provided, will be used as the default for all values in InspectConfig.
// `inspect_config` will be merged into the values persisted as part of the
// template.
string inspect_template_name = 3;
// Actions to execute at the completion of the job. Are executed in the order
// provided.
repeated Action actions = 4;
}
// Combines all of the information about a DLP job.
message DlpJob {
enum JobState {
JOB_STATE_UNSPECIFIED = 0;
// The job has not yet started.
PENDING = 1;
// The job is currently running.
RUNNING = 2;
// The job is no longer running.
DONE = 3;
// The job was canceled before it could complete.
CANCELED = 4;
// The job had an error and did not complete.
FAILED = 5;
}
// The server-assigned name.
string name = 1;
// The type of job.
DlpJobType type = 2;
// State of a job.
JobState state = 3;
oneof details {
// Results from analyzing risk of a data source.
AnalyzeDataSourceRiskDetails risk_details = 4;
// Results from inspecting a data source.
InspectDataSourceDetails inspect_details = 5;
}
// Time when the job was created.
google.protobuf.Timestamp create_time = 6;
// Time when the job started.
google.protobuf.Timestamp start_time = 7;
// Time when the job finished.
google.protobuf.Timestamp end_time = 8;
// If created by a job trigger, the resource name of the trigger that
// instantiated the job.
string job_trigger_name = 10;
// A stream of errors encountered running the job.
repeated Error errors = 11;
}
// The request message for [DlpJobs.GetDlpJob][].
message GetDlpJobRequest {
// The name of the DlpJob resource.
string name = 1;
}
// Operators available for comparing the value of fields.
enum RelationalOperator {
RELATIONAL_OPERATOR_UNSPECIFIED = 0;
// Equal. Attempts to match even with incompatible types.
EQUAL_TO = 1;
// Not equal to. Attempts to match even with incompatible types.
NOT_EQUAL_TO = 2;
// Greater than.
GREATER_THAN = 3;
// Less than.
LESS_THAN = 4;
// Greater than or equals.
GREATER_THAN_OR_EQUALS = 5;
// Less than or equals.
LESS_THAN_OR_EQUALS = 6;
// Exists
EXISTS = 7;
}
// The request message for listing DLP jobs.
message ListDlpJobsRequest {
// The parent resource name, for example projects/my-project-id.
string parent = 4;
// Optional. Allows filtering.
//
// Supported syntax:
//
// * Filter expressions are made up of one or more restrictions.
// * Restrictions can be combined by `AND` or `OR` logical operators. A
// sequence of restrictions implicitly uses `AND`.
// * A restriction has the form of `<field> <operator> <value>`.
// * Supported fields/values for inspect jobs:
// - `state` - PENDING|RUNNING|CANCELED|FINISHED|FAILED
// - `inspected_storage` - DATASTORE|CLOUD_STORAGE|BIGQUERY
// - `trigger_name` - The resource name of the trigger that created job.
// * Supported fields for risk analysis jobs:
// - `state` - RUNNING|CANCELED|FINISHED|FAILED
// * The operator must be `=` or `!=`.
//
// Examples:
//
// * inspected_storage = cloud_storage AND state = done
// * inspected_storage = cloud_storage OR inspected_storage = bigquery
// * inspected_storage = cloud_storage AND (state = done OR state = canceled)
//
// The length of this field should be no more than 500 characters.
string filter = 1;
// The standard list page size.
int32 page_size = 2;
// The standard list page token.
string page_token = 3;
// The type of job. Defaults to `DlpJobType.INSPECT`
DlpJobType type = 5;
// Optional comma separated list of fields to order by,
// followed by `asc` or `desc` postfix. This list is case-insensitive,
// default sorting order is ascending, redundant space characters are
// insignificant.
//
// Example: `name asc, end_time asc, create_time desc`
//
// Supported fields are:
//
// - `create_time`: corresponds to time the job was created.
// - `end_time`: corresponds to time the job ended.
// - `name`: corresponds to job's name.
// - `state`: corresponds to `state`
string order_by = 6;
}
// The response message for listing DLP jobs.
message ListDlpJobsResponse {
// A list of DlpJobs that matches the specified filter in the request.
repeated DlpJob jobs = 1;
// The standard List next-page token.
string next_page_token = 2;
}
// The request message for canceling a DLP job.
message CancelDlpJobRequest {
// The name of the DlpJob resource to be cancelled.
string name = 1;
}
// The request message for deleting a DLP job.
message DeleteDlpJobRequest {
// The name of the DlpJob resource to be deleted.
string name = 1;
}
// Request message for CreateDeidentifyTemplate.
message CreateDeidentifyTemplateRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// The DeidentifyTemplate to create.
DeidentifyTemplate deidentify_template = 2;
// The template id can contain uppercase and lowercase letters,
// numbers, and hyphens; that is, it must match the regular
// expression: `[a-zA-Z\\d-]+`. The maximum length is 100
// characters. Can be empty to allow the system to generate one.
string template_id = 3;
}
// Request message for UpdateDeidentifyTemplate.
message UpdateDeidentifyTemplateRequest {
// Resource name of organization and deidentify template to be updated, for
// example `organizations/433245324/deidentifyTemplates/432452342` or
// projects/project-id/deidentifyTemplates/432452342.
string name = 1;
// New DeidentifyTemplate value.
DeidentifyTemplate deidentify_template = 2;
// Mask to control which fields get updated.
google.protobuf.FieldMask update_mask = 3;
}
// Request message for GetDeidentifyTemplate.
message GetDeidentifyTemplateRequest {
// Resource name of the organization and deidentify template to be read, for
// example `organizations/433245324/deidentifyTemplates/432452342` or
// projects/project-id/deidentifyTemplates/432452342.
string name = 1;
}
// Request message for ListDeidentifyTemplates.
message ListDeidentifyTemplatesRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// Optional page token to continue retrieval. Comes from previous call
// to `ListDeidentifyTemplates`.
string page_token = 2;
// Optional size of the page, can be limited by server. If zero server returns
// a page of max size 100.
int32 page_size = 3;
// Optional comma separated list of fields to order by,
// followed by `asc` or `desc` postfix. This list is case-insensitive,
// default sorting order is ascending, redundant space characters are
// insignificant.
//
// Example: `name asc,update_time, create_time desc`
//
// Supported fields are:
//
// - `create_time`: corresponds to time the template was created.
// - `update_time`: corresponds to time the template was last updated.
// - `name`: corresponds to template's name.
// - `display_name`: corresponds to template's display name.
string order_by = 4;
}
// Response message for ListDeidentifyTemplates.
message ListDeidentifyTemplatesResponse {
// List of deidentify templates, up to page_size in
// ListDeidentifyTemplatesRequest.
repeated DeidentifyTemplate deidentify_templates = 1;
// If the next page is available then the next page token to be used
// in following ListDeidentifyTemplates request.
string next_page_token = 2;
}
// Request message for DeleteDeidentifyTemplate.
message DeleteDeidentifyTemplateRequest {
// Resource name of the organization and deidentify template to be deleted,
// for example `organizations/433245324/deidentifyTemplates/432452342` or
// projects/project-id/deidentifyTemplates/432452342.
string name = 1;
}
// Configuration for a custom dictionary created from a data source of any size
// up to the maximum size defined in the
// [limits](https://cloud.google.com/dlp/limits) page. The artifacts of
// dictionary creation are stored in the specified Google Cloud Storage
// location. Consider using `CustomInfoType.Dictionary` for smaller dictionaries
// that satisfy the size requirements.
message LargeCustomDictionaryConfig {
// Location to store dictionary artifacts in Google Cloud Storage. These files
// will only be accessible by project owners and the DLP API. If any of these
// artifacts are modified, the dictionary is considered invalid and can no
// longer be used.
CloudStoragePath output_path = 1;
oneof source {
// Set of files containing newline-delimited lists of dictionary phrases.
CloudStorageFileSet cloud_storage_file_set = 2;
// Field in a BigQuery table where each cell represents a dictionary phrase.
BigQueryField big_query_field = 3;
}
}
// Configuration for a StoredInfoType.
message StoredInfoTypeConfig {
// Display name of the StoredInfoType (max 256 characters).
string display_name = 1;
// Description of the StoredInfoType (max 256 characters).
string description = 2;
oneof type {
// StoredInfoType where findings are defined by a dictionary of phrases.
LargeCustomDictionaryConfig large_custom_dictionary = 3;
}
}
// Version of a StoredInfoType, including the configuration used to build it,
// create timestamp, and current state.
message StoredInfoTypeVersion {
// StoredInfoType configuration.
StoredInfoTypeConfig config = 1;
// Create timestamp of the version. Read-only, determined by the system
// when the version is created.
google.protobuf.Timestamp create_time = 2;
// Stored info type version state. Read-only, updated by the system
// during dictionary creation.
StoredInfoTypeState state = 3;
// Errors that occurred when creating this storedInfoType version, or
// anomalies detected in the storedInfoType data that render it unusable. Only
// the five most recent errors will be displayed, with the most recent error
// appearing first.
// <p>For example, some of the data for stored custom dictionaries is put in
// the user's Google Cloud Storage bucket, and if this data is modified or
// deleted by the user or another system, the dictionary becomes invalid.
// <p>If any errors occur, fix the problem indicated by the error message and
// use the UpdateStoredInfoType API method to create another version of the
// storedInfoType to continue using it, reusing the same `config` if it was
// not the source of the error.
repeated Error errors = 4;
}
// StoredInfoType resource message that contains information about the current
// version and any pending updates.
message StoredInfoType {
// Resource name.
string name = 1;
// Current version of the stored info type.
StoredInfoTypeVersion current_version = 2;
// Pending versions of the stored info type. Empty if no versions are
// pending.
repeated StoredInfoTypeVersion pending_versions = 3;
}
// Request message for CreateStoredInfoType.
message CreateStoredInfoTypeRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// Configuration of the storedInfoType to create.
StoredInfoTypeConfig config = 2;
// The storedInfoType ID can contain uppercase and lowercase letters,
// numbers, and hyphens; that is, it must match the regular
// expression: `[a-zA-Z\\d-]+`. The maximum length is 100
// characters. Can be empty to allow the system to generate one.
string stored_info_type_id = 3;
}
// Request message for UpdateStoredInfoType.
message UpdateStoredInfoTypeRequest {
// Resource name of organization and storedInfoType to be updated, for
// example `organizations/433245324/storedInfoTypes/432452342` or
// projects/project-id/storedInfoTypes/432452342.
string name = 1;
// Updated configuration for the storedInfoType. If not provided, a new
// version of the storedInfoType will be created with the existing
// configuration.
StoredInfoTypeConfig config = 2;
// Mask to control which fields get updated.
google.protobuf.FieldMask update_mask = 3;
}
// Request message for GetStoredInfoType.
message GetStoredInfoTypeRequest {
// Resource name of the organization and storedInfoType to be read, for
// example `organizations/433245324/storedInfoTypes/432452342` or
// projects/project-id/storedInfoTypes/432452342.
string name = 1;
}
// Request message for ListStoredInfoTypes.
message ListStoredInfoTypesRequest {
// The parent resource name, for example projects/my-project-id or
// organizations/my-org-id.
string parent = 1;
// Optional page token to continue retrieval. Comes from previous call
// to `ListStoredInfoTypes`.
string page_token = 2;
// Optional size of the page, can be limited by server. If zero server returns
// a page of max size 100.
int32 page_size = 3;
// Optional comma separated list of fields to order by,
// followed by `asc` or `desc` postfix. This list is case-insensitive,
// default sorting order is ascending, redundant space characters are
// insignificant.
//
// Example: `name asc, display_name, create_time desc`
//
// Supported fields are:
//
// - `create_time`: corresponds to time the most recent version of the
// resource was created.
// - `state`: corresponds to the state of the resource.
// - `name`: corresponds to resource name.
// - `display_name`: corresponds to info type's display name.
string order_by = 4;
}
// Response message for ListStoredInfoTypes.
message ListStoredInfoTypesResponse {
// List of storedInfoTypes, up to page_size in ListStoredInfoTypesRequest.
repeated StoredInfoType stored_info_types = 1;
// If the next page is available then the next page token to be used
// in following ListStoredInfoTypes request.
string next_page_token = 2;
}
// Request message for DeleteStoredInfoType.
message DeleteStoredInfoTypeRequest {
// Resource name of the organization and storedInfoType to be deleted, for
// example `organizations/433245324/storedInfoTypes/432452342` or
// projects/project-id/storedInfoTypes/432452342.
string name = 1;
}
// An enum to represent the various type of DLP jobs.
enum DlpJobType {
DLP_JOB_TYPE_UNSPECIFIED = 0;
// The job inspected Google Cloud for sensitive data.
INSPECT_JOB = 1;
// The job executed a Risk Analysis computation.
RISK_ANALYSIS_JOB = 2;
}
// State of a StoredInfoType version.
enum StoredInfoTypeState {
STORED_INFO_TYPE_STATE_UNSPECIFIED = 0;
// StoredInfoType version is being created.
PENDING = 1;
// StoredInfoType version is ready for use.
READY = 2;
// StoredInfoType creation failed. All relevant error messages are returned in
// the `StoredInfoTypeVersion` message.
FAILED = 3;
// StoredInfoType is no longer valid because artifacts stored in
// user-controlled storage were modified. To fix an invalid StoredInfoType,
// use the `UpdateStoredInfoType` method to create a new version.
INVALID = 4;
}