From d8a3dfb82f5cae3f1bcdcec7c5726581532da7d5 Mon Sep 17 00:00:00 2001 From: Google APIs Date: Wed, 29 Jul 2020 11:26:43 -0700 Subject: [PATCH] feat: Additional fields for the `ClusterConfig` and `InstanceGroupConfig` messages. This change includes the following updates: 1. There is a new `temp_bucket` field for clusters. 2. There is a new `endpoint_config` field for clusters. 3. There is a new `preemptibility` field for instance group configs. 4. There are various updates to the doc comments. PiperOrigin-RevId: 323829608 --- .../dataproc/v1/autoscaling_policies.proto | 10 ++- google/cloud/dataproc/v1/clusters.proto | 59 +++++++++++++++- google/cloud/dataproc/v1/dataproc_v1.yaml | 69 +++++++++++++++---- google/cloud/dataproc/v1/jobs.proto | 29 ++++---- google/cloud/dataproc/v1/shared.proto | 2 +- .../dataproc/v1/workflow_templates.proto | 28 +++++--- 6 files changed, 155 insertions(+), 42 deletions(-) diff --git a/google/cloud/dataproc/v1/autoscaling_policies.proto b/google/cloud/dataproc/v1/autoscaling_policies.proto index 4ce5868d..8d10a86f 100644 --- a/google/cloud/dataproc/v1/autoscaling_policies.proto +++ b/google/cloud/dataproc/v1/autoscaling_policies.proto @@ -164,20 +164,26 @@ message BasicYarnAutoscalingConfig { // Bounds: [0s, 1d]. google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED]; - // Required. Fraction of average pending memory in the last cooldown period + // Required. Fraction of average YARN pending memory in the last cooldown period // for which to add workers. A scale-up factor of 1.0 will result in scaling // up so that there is no pending memory remaining after the update (more // aggressive scaling). A scale-up factor closer to 0 will result in a smaller // magnitude of scaling up (less aggressive scaling). + // See [How autoscaling + // works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works) + // for more information. // // Bounds: [0.0, 1.0]. double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED]; - // Required. Fraction of average pending memory in the last cooldown period + // Required. Fraction of average YARN pending memory in the last cooldown period // for which to remove workers. A scale-down factor of 1 will result in // scaling down so that there is no available memory remaining after the // update (more aggressive scaling). A scale-down factor of 0 disables // removing workers, which can be beneficial for autoscaling a single job. + // See [How autoscaling + // works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works) + // for more information. // // Bounds: [0.0, 1.0]. double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED]; diff --git a/google/cloud/dataproc/v1/clusters.proto b/google/cloud/dataproc/v1/clusters.proto index c66d35d3..ccff3522 100644 --- a/google/cloud/dataproc/v1/clusters.proto +++ b/google/cloud/dataproc/v1/clusters.proto @@ -170,6 +170,17 @@ message ClusterConfig { // bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL]; + // Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data, + // such as Spark and MapReduce history files. + // If you do not specify a temp bucket, + // Dataproc will determine a Cloud Storage location (US, + // ASIA, or EU) for your cluster's temp bucket according to the + // Compute Engine zone where your cluster is deployed, and then create + // and manage this project-level, per-location bucket. The default bucket has + // a TTL of 90 days, but you can use any TTL (or none) if you specify a + // bucket. + string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL]; + // Optional. The shared Compute Engine config settings for // all instances in a cluster. GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL]; @@ -216,6 +227,20 @@ message ClusterConfig { // Optional. Lifecycle setting for the cluster. LifecycleConfig lifecycle_config = 17 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Port/endpoint configuration for this cluster + EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL]; +} + +// Endpoint config for this cluster +message EndpointConfig { + // Output only. The map of port descriptions to URLs. Will only be populated + // if enable_http_port_access is true. + map http_ports = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; + + // Optional. If true, enable http access to specific ports on the cluster + // from external sources. Defaults to false. + bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL]; } // Autoscaling Policy config associated with the cluster. @@ -288,7 +313,7 @@ message GceClusterConfig { bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. The [Dataproc service - // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc) + // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc) // (also see [VM Data Plane // identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity)) // used by Dataproc cluster VM instances to access Google Cloud Platform @@ -332,6 +357,27 @@ message GceClusterConfig { // The config settings for Compute Engine resources in // an instance group, such as a master or worker group. message InstanceGroupConfig { + // Controls the use of + // [preemptible instances] + // (https://cloud.google.com/compute/docs/instances/preemptible) + // within the group. + enum Preemptibility { + // Preemptibility is unspecified, the system will choose the + // appropriate setting for each instance group. + PREEMPTIBILITY_UNSPECIFIED = 0; + + // Instances are non-preemptible. + // + // This option is allowed for all instance groups and is the only valid + // value for Master and Worker instance groups. + NON_PREEMPTIBLE = 1; + + // Instances are preemptible. + // + // This option is allowed only for secondary worker groups. + PREEMPTIBLE = 2; + } + // Optional. The number of VM instances in the instance group. // For master instance groups, must be set to 1. int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL]; @@ -382,6 +428,15 @@ message InstanceGroupConfig { // instances. bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; + // Optional. Specifies the preemptibility of the instance group. + // + // The default value for master and worker groups is + // `NON_PREEMPTIBLE`. This default cannot be changed. + // + // The default value for secondary instances is + // `PREEMPTIBLE`. + Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL]; + // Output only. The config for Compute Engine Instance Group // Manager that manages this group. // This is only used for preemptible instance groups. @@ -608,7 +663,7 @@ message KerberosConfig { message SoftwareConfig { // Optional. The version of software inside the cluster. It must be one of the // supported [Dataproc - // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions), + // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions), // such as "1.2" (including a subminor version, such as "1.2.29"), or the // ["preview" // version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions). diff --git a/google/cloud/dataproc/v1/dataproc_v1.yaml b/google/cloud/dataproc/v1/dataproc_v1.yaml index cb6dc5fc..c6bdfc5f 100644 --- a/google/cloud/dataproc/v1/dataproc_v1.yaml +++ b/google/cloud/dataproc/v1/dataproc_v1.yaml @@ -28,14 +28,14 @@ documentation: Sets the access control policy on the specified resource. Replaces any existing policy. - Can return Public Errors: NOT_FOUND, INVALID_ARGUMENT and - PERMISSION_DENIED + Can return `NOT_FOUND`, `INVALID_ARGUMENT`, and `PERMISSION_DENIED` + errors. - selector: google.iam.v1.IAMPolicy.TestIamPermissions description: |- Returns permissions that a caller has on the specified resource. If the resource does not exist, this will return an empty set of - permissions, not a NOT_FOUND error. + permissions, not a `NOT_FOUND` error. Note: This operation is designed to be used for building permission-aware UIs and command-line tools, not for authorization @@ -43,17 +43,62 @@ documentation: http: rules: - - selector: google.longrunning.Operations.ListOperations - get: '/v1/{name=projects/*/regions/*/operations}' - - - selector: google.longrunning.Operations.GetOperation - get: '/v1/{name=projects/*/regions/*/operations/*}' - - - selector: google.longrunning.Operations.DeleteOperation - delete: '/v1/{name=projects/*/regions/*/operations/*}' - + - selector: google.iam.v1.IAMPolicy.GetIamPolicy + post: '/v1/{resource=projects/*/regions/*/clusters/*}:getIamPolicy' + body: '*' + additional_bindings: + - post: '/v1/{resource=projects/*/regions/*/jobs/*}:getIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/regions/*/operations/*}:getIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/regions/*/workflowTemplates/*}:getIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/locations/*/workflowTemplates/*}:getIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/regions/*/autoscalingPolicies/*}:getIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/locations/*/autoscalingPolicies/*}:getIamPolicy' + body: '*' + - selector: google.iam.v1.IAMPolicy.SetIamPolicy + post: '/v1/{resource=projects/*/regions/*/clusters/*}:setIamPolicy' + body: '*' + additional_bindings: + - post: '/v1/{resource=projects/*/regions/*/jobs/*}:setIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/regions/*/operations/*}:setIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/regions/*/workflowTemplates/*}:setIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/locations/*/workflowTemplates/*}:setIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/regions/*/autoscalingPolicies/*}:setIamPolicy' + body: '*' + - post: '/v1/{resource=projects/*/locations/*/autoscalingPolicies/*}:setIamPolicy' + body: '*' + - selector: google.iam.v1.IAMPolicy.TestIamPermissions + post: '/v1/{resource=projects/*/regions/*/clusters/*}:testIamPermissions' + body: '*' + additional_bindings: + - post: '/v1/{resource=projects/*/regions/*/jobs/*}:testIamPermissions' + body: '*' + - post: '/v1/{resource=projects/*/regions/*/operations/*}:testIamPermissions' + body: '*' + - post: '/v1/{resource=projects/*/regions/*/workflowTemplates/*}:testIamPermissions' + body: '*' + - post: '/v1/{resource=projects/*/locations/*/workflowTemplates/*}:testIamPermissions' + body: '*' + - post: '/v1/{resource=projects/*/regions/*/autoscalingPolicies/*}:testIamPermissions' + body: '*' + - post: '/v1/{resource=projects/*/locations/*/autoscalingPolicies/*}:testIamPermissions' + body: '*' - selector: google.longrunning.Operations.CancelOperation post: '/v1/{name=projects/*/regions/*/operations/*}:cancel' + - selector: google.longrunning.Operations.DeleteOperation + delete: '/v1/{name=projects/*/regions/*/operations/*}' + - selector: google.longrunning.Operations.GetOperation + get: '/v1/{name=projects/*/regions/*/operations/*}' + - selector: google.longrunning.Operations.ListOperations + get: '/v1/{name=projects/*/regions/*/operations}' authentication: rules: diff --git a/google/cloud/dataproc/v1/jobs.proto b/google/cloud/dataproc/v1/jobs.proto index b9026f62..065530f3 100644 --- a/google/cloud/dataproc/v1/jobs.proto +++ b/google/cloud/dataproc/v1/jobs.proto @@ -224,12 +224,12 @@ message SparkJob { // Spark driver and tasks. repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; - // Optional. HCFS URIs of files to be copied to the working directory of - // Spark drivers and distributed tasks. Useful for naively parallel tasks. + // Optional. HCFS URIs of files to be placed in the working directory of + // each executor. Useful for naively parallel tasks. repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; - // Optional. HCFS URIs of archives to be extracted in the working directory - // of Spark drivers and tasks. Supported file types: + // Optional. HCFS URIs of archives to be extracted into the working directory + // of each executor. Supported file types: // .jar, .tar, .tar.gz, .tgz, and .zip. repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; @@ -265,11 +265,12 @@ message PySparkJob { // Python driver and tasks. repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; - // Optional. HCFS URIs of files to be copied to the working directory of - // Python drivers and distributed tasks. Useful for naively parallel tasks. + // Optional. HCFS URIs of files to be placed in the working directory of + // each executor. Useful for naively parallel tasks. repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; - // Optional. HCFS URIs of archives to be extracted in the working directory of + // Optional. HCFS URIs of archives to be extracted into the working directory + // of each executor. Supported file types: // .jar, .tar, .tar.gz, .tgz, and .zip. repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; @@ -414,12 +415,12 @@ message SparkRJob { // occur that causes an incorrect job submission. repeated string args = 2 [(google.api.field_behavior) = OPTIONAL]; - // Optional. HCFS URIs of files to be copied to the working directory of - // R drivers and distributed tasks. Useful for naively parallel tasks. + // Optional. HCFS URIs of files to be placed in the working directory of + // each executor. Useful for naively parallel tasks. repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL]; - // Optional. HCFS URIs of archives to be extracted in the working directory of - // Spark drivers and tasks. Supported file types: + // Optional. HCFS URIs of archives to be extracted into the working directory + // of each executor. Supported file types: // .jar, .tar, .tar.gz, .tgz, and .zip. repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL]; @@ -565,9 +566,9 @@ message JobStatus { // Encapsulates the full scoping used to reference a job. message JobReference { - // Required. The ID of the Google Cloud Platform project that the job - // belongs to. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; + // Optional. The ID of the Google Cloud Platform project that the job belongs to. If + // specified, must match the request project ID. + string project_id = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. The job ID, which must be unique within the project. // diff --git a/google/cloud/dataproc/v1/shared.proto b/google/cloud/dataproc/v1/shared.proto index 99d6e776..7a1382f1 100644 --- a/google/cloud/dataproc/v1/shared.proto +++ b/google/cloud/dataproc/v1/shared.proto @@ -25,7 +25,7 @@ option java_package = "com.google.cloud.dataproc.v1"; // Cluster components that can be activated. enum Component { - // Unspecified component. + // Unspecified component. Specifying this will cause Cluster creation to fail. COMPONENT_UNSPECIFIED = 0; // The Anaconda python distribution. diff --git a/google/cloud/dataproc/v1/workflow_templates.proto b/google/cloud/dataproc/v1/workflow_templates.proto index d1cfcc09..04f81004 100644 --- a/google/cloud/dataproc/v1/workflow_templates.proto +++ b/google/cloud/dataproc/v1/workflow_templates.proto @@ -238,7 +238,7 @@ message WorkflowTemplate { // Required. The Directed Acyclic Graph of Jobs to submit. repeated OrderedJob jobs = 8 [(google.api.field_behavior) = REQUIRED]; - // Optional. emplate parameters whose values are substituted into the + // Optional. Template parameters whose values are substituted into the // template. Values for parameters must be provided when the template is // instantiated. repeated TemplateParameter parameters = 9 [(google.api.field_behavior) = OPTIONAL]; @@ -319,23 +319,29 @@ message OrderedJob { // Required. The job definition. oneof job_type { - HadoopJob hadoop_job = 2; + // Optional. Job is a Hadoop job. + HadoopJob hadoop_job = 2 [(google.api.field_behavior) = OPTIONAL]; - SparkJob spark_job = 3; + // Optional. Job is a Spark job. + SparkJob spark_job = 3 [(google.api.field_behavior) = OPTIONAL]; - PySparkJob pyspark_job = 4; + // Optional. Job is a PySpark job. + PySparkJob pyspark_job = 4 [(google.api.field_behavior) = OPTIONAL]; - HiveJob hive_job = 5; + // Optional. Job is a Hive job. + HiveJob hive_job = 5 [(google.api.field_behavior) = OPTIONAL]; - PigJob pig_job = 6; + // Optional. Job is a Pig job. + PigJob pig_job = 6 [(google.api.field_behavior) = OPTIONAL]; - // Spark R job - SparkRJob spark_r_job = 11; + // Optional. Job is a SparkR job. + SparkRJob spark_r_job = 11 [(google.api.field_behavior) = OPTIONAL]; - SparkSqlJob spark_sql_job = 7; + // Optional. Job is a SparkSql job. + SparkSqlJob spark_sql_job = 7 [(google.api.field_behavior) = OPTIONAL]; - // Presto job - PrestoJob presto_job = 12; + // Optional. Job is a Presto job. + PrestoJob presto_job = 12 [(google.api.field_behavior) = OPTIONAL]; } // Optional. The labels to associate with this job.