From d8a3dfb82f5cae3f1bcdcec7c5726581532da7d5 Mon Sep 17 00:00:00 2001
From: Google APIs <noreply@google.com>
Date: Wed, 29 Jul 2020 11:26:43 -0700
Subject: [PATCH] feat: Additional fields for the `ClusterConfig` and
 `InstanceGroupConfig` messages.

This change includes the following updates:
1. There is a new `temp_bucket` field for clusters.
2. There is a new `endpoint_config` field for clusters.
3. There is a new `preemptibility` field for instance group configs.
4. There are various updates to the doc comments.

PiperOrigin-RevId: 323829608
---
 .../dataproc/v1/autoscaling_policies.proto    | 10 ++-
 google/cloud/dataproc/v1/clusters.proto       | 59 +++++++++++++++-
 google/cloud/dataproc/v1/dataproc_v1.yaml     | 69 +++++++++++++++----
 google/cloud/dataproc/v1/jobs.proto           | 29 ++++----
 google/cloud/dataproc/v1/shared.proto         |  2 +-
 .../dataproc/v1/workflow_templates.proto      | 28 +++++---
 6 files changed, 155 insertions(+), 42 deletions(-)

diff --git a/google/cloud/dataproc/v1/autoscaling_policies.proto b/google/cloud/dataproc/v1/autoscaling_policies.proto
index 4ce5868d..8d10a86f 100644
--- a/google/cloud/dataproc/v1/autoscaling_policies.proto
+++ b/google/cloud/dataproc/v1/autoscaling_policies.proto
@@ -164,20 +164,26 @@ message BasicYarnAutoscalingConfig {
   // Bounds: [0s, 1d].
   google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];
 
-  // Required. Fraction of average pending memory in the last cooldown period
+  // Required. Fraction of average YARN pending memory in the last cooldown period
   // for which to add workers. A scale-up factor of 1.0 will result in scaling
   // up so that there is no pending memory remaining after the update (more
   // aggressive scaling). A scale-up factor closer to 0 will result in a smaller
   // magnitude of scaling up (less aggressive scaling).
+  // See [How autoscaling
+  // works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
+  // for more information.
   //
   // Bounds: [0.0, 1.0].
   double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];
 
-  // Required. Fraction of average pending memory in the last cooldown period
+  // Required. Fraction of average YARN pending memory in the last cooldown period
   // for which to remove workers. A scale-down factor of 1 will result in
   // scaling down so that there is no available memory remaining after the
   // update (more aggressive scaling). A scale-down factor of 0 disables
   // removing workers, which can be beneficial for autoscaling a single job.
+  // See [How autoscaling
+  // works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
+  // for more information.
   //
   // Bounds: [0.0, 1.0].
   double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];
diff --git a/google/cloud/dataproc/v1/clusters.proto b/google/cloud/dataproc/v1/clusters.proto
index c66d35d3..ccff3522 100644
--- a/google/cloud/dataproc/v1/clusters.proto
+++ b/google/cloud/dataproc/v1/clusters.proto
@@ -170,6 +170,17 @@ message ClusterConfig {
   // bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
   string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
 
+  // Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
+  // such as Spark and MapReduce history files.
+  // If you do not specify a temp bucket,
+  // Dataproc will determine a Cloud Storage location (US,
+  // ASIA, or EU) for your cluster's temp bucket according to the
+  // Compute Engine zone where your cluster is deployed, and then create
+  // and manage this project-level, per-location bucket. The default bucket has
+  // a TTL of 90 days, but you can use any TTL (or none) if you specify a
+  // bucket.
+  string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];
+
   // Optional. The shared Compute Engine config settings for
   // all instances in a cluster.
   GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
@@ -216,6 +227,20 @@ message ClusterConfig {
 
   // Optional. Lifecycle setting for the cluster.
   LifecycleConfig lifecycle_config = 17 [(google.api.field_behavior) = OPTIONAL];
+
+  // Optional. Port/endpoint configuration for this cluster
+  EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
+}
+
+// Endpoint config for this cluster
+message EndpointConfig {
+  // Output only. The map of port descriptions to URLs. Will only be populated
+  // if enable_http_port_access is true.
+  map<string, string> http_ports = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+  // Optional. If true, enable http access to specific ports on the cluster
+  // from external sources. Defaults to false.
+  bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Autoscaling Policy config associated with the cluster.
@@ -288,7 +313,7 @@ message GceClusterConfig {
   bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. The [Dataproc service
-  // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
+  // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
   // (also see [VM Data Plane
   // identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
   // used by Dataproc cluster VM instances to access Google Cloud Platform
@@ -332,6 +357,27 @@ message GceClusterConfig {
 // The config settings for Compute Engine resources in
 // an instance group, such as a master or worker group.
 message InstanceGroupConfig {
+  // Controls the use of
+  // [preemptible instances]
+  // (https://cloud.google.com/compute/docs/instances/preemptible)
+  // within the group.
+  enum Preemptibility {
+    // Preemptibility is unspecified, the system will choose the
+    // appropriate setting for each instance group.
+    PREEMPTIBILITY_UNSPECIFIED = 0;
+
+    // Instances are non-preemptible.
+    //
+    // This option is allowed for all instance groups and is the only valid
+    // value for Master and Worker instance groups.
+    NON_PREEMPTIBLE = 1;
+
+    // Instances are preemptible.
+    //
+    // This option is allowed only for secondary worker groups.
+    PREEMPTIBLE = 2;
+  }
+
   // Optional. The number of VM instances in the instance group.
   // For master instance groups, must be set to 1.
   int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
@@ -382,6 +428,15 @@ message InstanceGroupConfig {
   // instances.
   bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
 
+  // Optional. Specifies the preemptibility of the instance group.
+  //
+  // The default value for master and worker groups is
+  // `NON_PREEMPTIBLE`. This default cannot be changed.
+  //
+  // The default value for secondary instances is
+  // `PREEMPTIBLE`.
+  Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];
+
   // Output only. The config for Compute Engine Instance Group
   // Manager that manages this group.
   // This is only used for preemptible instance groups.
@@ -608,7 +663,7 @@ message KerberosConfig {
 message SoftwareConfig {
   // Optional. The version of software inside the cluster. It must be one of the
   // supported [Dataproc
-  // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
+  // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
   // such as "1.2" (including a subminor version, such as "1.2.29"), or the
   // ["preview"
   // version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
diff --git a/google/cloud/dataproc/v1/dataproc_v1.yaml b/google/cloud/dataproc/v1/dataproc_v1.yaml
index cb6dc5fc..c6bdfc5f 100644
--- a/google/cloud/dataproc/v1/dataproc_v1.yaml
+++ b/google/cloud/dataproc/v1/dataproc_v1.yaml
@@ -28,14 +28,14 @@ documentation:
       Sets the access control policy on the specified resource. Replaces
       any existing policy.
 
-      Can return Public Errors: NOT_FOUND, INVALID_ARGUMENT and
-      PERMISSION_DENIED
+      Can return `NOT_FOUND`, `INVALID_ARGUMENT`, and `PERMISSION_DENIED`
+      errors.
 
   - selector: google.iam.v1.IAMPolicy.TestIamPermissions
     description: |-
       Returns permissions that a caller has on the specified resource. If the
       resource does not exist, this will return an empty set of
-      permissions, not a NOT_FOUND error.
+      permissions, not a `NOT_FOUND` error.
 
       Note: This operation is designed to be used for building
       permission-aware UIs and command-line tools, not for authorization
@@ -43,17 +43,62 @@ documentation:
 
 http:
   rules:
-  - selector: google.longrunning.Operations.ListOperations
-    get: '/v1/{name=projects/*/regions/*/operations}'
-
-  - selector: google.longrunning.Operations.GetOperation
-    get: '/v1/{name=projects/*/regions/*/operations/*}'
-
-  - selector: google.longrunning.Operations.DeleteOperation
-    delete: '/v1/{name=projects/*/regions/*/operations/*}'
-
+  - selector: google.iam.v1.IAMPolicy.GetIamPolicy
+    post: '/v1/{resource=projects/*/regions/*/clusters/*}:getIamPolicy'
+    body: '*'
+    additional_bindings:
+    - post: '/v1/{resource=projects/*/regions/*/jobs/*}:getIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/regions/*/operations/*}:getIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/regions/*/workflowTemplates/*}:getIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/workflowTemplates/*}:getIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/regions/*/autoscalingPolicies/*}:getIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/autoscalingPolicies/*}:getIamPolicy'
+      body: '*'
+  - selector: google.iam.v1.IAMPolicy.SetIamPolicy
+    post: '/v1/{resource=projects/*/regions/*/clusters/*}:setIamPolicy'
+    body: '*'
+    additional_bindings:
+    - post: '/v1/{resource=projects/*/regions/*/jobs/*}:setIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/regions/*/operations/*}:setIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/regions/*/workflowTemplates/*}:setIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/workflowTemplates/*}:setIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/regions/*/autoscalingPolicies/*}:setIamPolicy'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/autoscalingPolicies/*}:setIamPolicy'
+      body: '*'
+  - selector: google.iam.v1.IAMPolicy.TestIamPermissions
+    post: '/v1/{resource=projects/*/regions/*/clusters/*}:testIamPermissions'
+    body: '*'
+    additional_bindings:
+    - post: '/v1/{resource=projects/*/regions/*/jobs/*}:testIamPermissions'
+      body: '*'
+    - post: '/v1/{resource=projects/*/regions/*/operations/*}:testIamPermissions'
+      body: '*'
+    - post: '/v1/{resource=projects/*/regions/*/workflowTemplates/*}:testIamPermissions'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/workflowTemplates/*}:testIamPermissions'
+      body: '*'
+    - post: '/v1/{resource=projects/*/regions/*/autoscalingPolicies/*}:testIamPermissions'
+      body: '*'
+    - post: '/v1/{resource=projects/*/locations/*/autoscalingPolicies/*}:testIamPermissions'
+      body: '*'
   - selector: google.longrunning.Operations.CancelOperation
     post: '/v1/{name=projects/*/regions/*/operations/*}:cancel'
+  - selector: google.longrunning.Operations.DeleteOperation
+    delete: '/v1/{name=projects/*/regions/*/operations/*}'
+  - selector: google.longrunning.Operations.GetOperation
+    get: '/v1/{name=projects/*/regions/*/operations/*}'
+  - selector: google.longrunning.Operations.ListOperations
+    get: '/v1/{name=projects/*/regions/*/operations}'
 
 authentication:
   rules:
diff --git a/google/cloud/dataproc/v1/jobs.proto b/google/cloud/dataproc/v1/jobs.proto
index b9026f62..065530f3 100644
--- a/google/cloud/dataproc/v1/jobs.proto
+++ b/google/cloud/dataproc/v1/jobs.proto
@@ -224,12 +224,12 @@ message SparkJob {
   // Spark driver and tasks.
   repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of files to be copied to the working directory of
-  // Spark drivers and distributed tasks. Useful for naively parallel tasks.
+  // Optional. HCFS URIs of files to be placed in the working directory of
+  // each executor. Useful for naively parallel tasks.
   repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of archives to be extracted in the working directory
-  // of Spark drivers and tasks. Supported file types:
+  // Optional. HCFS URIs of archives to be extracted into the working directory
+  // of each executor. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
 
@@ -265,11 +265,12 @@ message PySparkJob {
   // Python driver and tasks.
   repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of files to be copied to the working directory of
-  // Python drivers and distributed tasks. Useful for naively parallel tasks.
+  // Optional. HCFS URIs of files to be placed in the working directory of
+  // each executor. Useful for naively parallel tasks.
   repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of archives to be extracted in the working directory of
+  // Optional. HCFS URIs of archives to be extracted into the working directory
+  // of each executor. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
 
@@ -414,12 +415,12 @@ message SparkRJob {
   // occur that causes an incorrect job submission.
   repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of files to be copied to the working directory of
-  // R drivers and distributed tasks. Useful for naively parallel tasks.
+  // Optional. HCFS URIs of files to be placed in the working directory of
+  // each executor. Useful for naively parallel tasks.
   repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
 
-  // Optional. HCFS URIs of archives to be extracted in the working directory of
-  // Spark drivers and tasks. Supported file types:
+  // Optional. HCFS URIs of archives to be extracted into the working directory
+  // of each executor. Supported file types:
   // .jar, .tar, .tar.gz, .tgz, and .zip.
   repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
 
@@ -565,9 +566,9 @@ message JobStatus {
 
 // Encapsulates the full scoping used to reference a job.
 message JobReference {
-  // Required. The ID of the Google Cloud Platform project that the job
-  // belongs to.
-  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
+  // Optional. The ID of the Google Cloud Platform project that the job belongs to. If
+  // specified, must match the request project ID.
+  string project_id = 1 [(google.api.field_behavior) = OPTIONAL];
 
   // Optional. The job ID, which must be unique within the project.
   //
diff --git a/google/cloud/dataproc/v1/shared.proto b/google/cloud/dataproc/v1/shared.proto
index 99d6e776..7a1382f1 100644
--- a/google/cloud/dataproc/v1/shared.proto
+++ b/google/cloud/dataproc/v1/shared.proto
@@ -25,7 +25,7 @@ option java_package = "com.google.cloud.dataproc.v1";
 
 // Cluster components that can be activated.
 enum Component {
-  // Unspecified component.
+  // Unspecified component. Specifying this will cause Cluster creation to fail.
   COMPONENT_UNSPECIFIED = 0;
 
   // The Anaconda python distribution.
diff --git a/google/cloud/dataproc/v1/workflow_templates.proto b/google/cloud/dataproc/v1/workflow_templates.proto
index d1cfcc09..04f81004 100644
--- a/google/cloud/dataproc/v1/workflow_templates.proto
+++ b/google/cloud/dataproc/v1/workflow_templates.proto
@@ -238,7 +238,7 @@ message WorkflowTemplate {
   // Required. The Directed Acyclic Graph of Jobs to submit.
   repeated OrderedJob jobs = 8 [(google.api.field_behavior) = REQUIRED];
 
-  // Optional. emplate parameters whose values are substituted into the
+  // Optional. Template parameters whose values are substituted into the
   // template. Values for parameters must be provided when the template is
   // instantiated.
   repeated TemplateParameter parameters = 9 [(google.api.field_behavior) = OPTIONAL];
@@ -319,23 +319,29 @@ message OrderedJob {
 
   // Required. The job definition.
   oneof job_type {
-    HadoopJob hadoop_job = 2;
+    // Optional. Job is a Hadoop job.
+    HadoopJob hadoop_job = 2 [(google.api.field_behavior) = OPTIONAL];
 
-    SparkJob spark_job = 3;
+    // Optional. Job is a Spark job.
+    SparkJob spark_job = 3 [(google.api.field_behavior) = OPTIONAL];
 
-    PySparkJob pyspark_job = 4;
+    // Optional. Job is a PySpark job.
+    PySparkJob pyspark_job = 4 [(google.api.field_behavior) = OPTIONAL];
 
-    HiveJob hive_job = 5;
+    // Optional. Job is a Hive job.
+    HiveJob hive_job = 5 [(google.api.field_behavior) = OPTIONAL];
 
-    PigJob pig_job = 6;
+    // Optional. Job is a Pig job.
+    PigJob pig_job = 6 [(google.api.field_behavior) = OPTIONAL];
 
-    // Spark R job
-    SparkRJob spark_r_job = 11;
+    // Optional. Job is a SparkR job.
+    SparkRJob spark_r_job = 11 [(google.api.field_behavior) = OPTIONAL];
 
-    SparkSqlJob spark_sql_job = 7;
+    // Optional. Job is a SparkSql job.
+    SparkSqlJob spark_sql_job = 7 [(google.api.field_behavior) = OPTIONAL];
 
-    // Presto job
-    PrestoJob presto_job = 12;
+    // Optional. Job is a Presto job.
+    PrestoJob presto_job = 12 [(google.api.field_behavior) = OPTIONAL];
   }
 
   // Optional. The labels to associate with this job.