diff --git a/google/assistant/embedded/v1alpha2/embedded_assistant.proto b/google/assistant/embedded/v1alpha2/embedded_assistant.proto new file mode 100755 index 00000000..bdc58166 --- /dev/null +++ b/google/assistant/embedded/v1alpha2/embedded_assistant.proto @@ -0,0 +1,372 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.assistant.embedded.v1alpha2; + +import "google/api/annotations.proto"; +import "google/type/latlng.proto"; + +option go_package = "google.golang.org/genproto/googleapis/assistant/embedded/v1alpha2;embedded"; +option java_multiple_files = true; +option java_outer_classname = "AssistantProto"; +option java_package = "com.google.assistant.embedded.v1alpha2"; + + +// Service that implements the Google Assistant API. +service EmbeddedAssistant { + // Initiates or continues a conversation with the embedded Assistant Service. + // Each call performs one round-trip, sending an audio request to the service + // and receiving the audio response. Uses bidirectional streaming to receive + // results, such as the `END_OF_UTTERANCE` event, while sending audio. + // + // A conversation is one or more gRPC connections, each consisting of several + // streamed requests and responses. + // For example, the user says *Add to my shopping list* and the Assistant + // responds *What do you want to add?*. The sequence of streamed requests and + // responses in the first gRPC message could be: + // + // * AssistRequest.config + // * AssistRequest.audio_in + // * AssistRequest.audio_in + // * AssistRequest.audio_in + // * AssistRequest.audio_in + // * AssistResponse.event_type.END_OF_UTTERANCE + // * AssistResponse.speech_results.transcript "add to my shopping list" + // * AssistResponse.dialog_state_out.microphone_mode.DIALOG_FOLLOW_ON + // * AssistResponse.audio_out + // * AssistResponse.audio_out + // * AssistResponse.audio_out + // + // + // The user then says *bagels* and the Assistant responds + // *OK, I've added bagels to your shopping list*. This is sent as another gRPC + // connection call to the `Assist` method, again with streamed requests and + // responses, such as: + // + // * AssistRequest.config + // * AssistRequest.audio_in + // * AssistRequest.audio_in + // * AssistRequest.audio_in + // * AssistResponse.event_type.END_OF_UTTERANCE + // * AssistResponse.dialog_state_out.microphone_mode.CLOSE_MICROPHONE + // * AssistResponse.audio_out + // * AssistResponse.audio_out + // * AssistResponse.audio_out + // * AssistResponse.audio_out + // + // Although the precise order of responses is not guaranteed, sequential + // `AssistResponse.audio_out` messages will always contain sequential portions + // of audio. + rpc Assist(stream AssistRequest) returns (stream AssistResponse); +} + +// Specifies how to process the `AssistRequest` messages. +message AssistConfig { + oneof type { + // Specifies how to process the subsequent incoming audio. Required if + // [AssistRequest.audio_in][google.assistant.embedded.v1alpha2.AssistRequest.audio_in] bytes will be provided in subsequent requests. + AudioInConfig audio_in_config = 1; + + // The text input to be sent to the Assistant. This can be populated from a + // text interface if audio input is not available. + string text_query = 6; + } + + // *Required* Specifies how to format the audio that will be returned. + AudioOutConfig audio_out_config = 2; + + // *Required* Represents the current dialog state. + DialogStateIn dialog_state_in = 3; + + // Device configuration that uniquely identifies a specific device. + DeviceConfig device_config = 4; +} + +// Specifies how to process the `audio_in` data that will be provided in +// subsequent requests. For recommended settings, see the Google Assistant SDK +// [best practices](https://developers.google.com/assistant/sdk/guides/service/python/best-practices/audio). +message AudioInConfig { + // Audio encoding of the data sent in the audio message. + // Audio must be one-channel (mono). The only language supported is "en-US". + enum Encoding { + // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][]. + ENCODING_UNSPECIFIED = 0; + + // Uncompressed 16-bit signed little-endian samples (Linear PCM). + // This encoding includes no header, only the raw audio bytes. + LINEAR16 = 1; + + // [`FLAC`](https://xiph.org/flac/documentation.html) (Free Lossless Audio + // Codec) is the recommended encoding because it is + // lossless--therefore recognition is not compromised--and + // requires only about half the bandwidth of `LINEAR16`. This encoding + // includes the `FLAC` stream header followed by audio data. It supports + // 16-bit and 24-bit samples, however, not all fields in `STREAMINFO` are + // supported. + FLAC = 2; + } + + // *Required* Encoding of audio data sent in all `audio_in` messages. + Encoding encoding = 1; + + // *Required* Sample rate (in Hertz) of the audio data sent in all `audio_in` + // messages. Valid values are from 16000-24000, but 16000 is optimal. + // For best results, set the sampling rate of the audio source to 16000 Hz. + // If that's not possible, use the native sample rate of the audio source + // (instead of re-sampling). + int32 sample_rate_hertz = 2; +} + +// Specifies the desired format for the server to use when it returns +// `audio_out` messages. +message AudioOutConfig { + // Audio encoding of the data returned in the audio message. All encodings are + // raw audio bytes with no header, except as indicated below. + enum Encoding { + // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][]. + ENCODING_UNSPECIFIED = 0; + + // Uncompressed 16-bit signed little-endian samples (Linear PCM). + LINEAR16 = 1; + + // MP3 audio encoding. The sample rate is encoded in the payload. + MP3 = 2; + + // Opus-encoded audio wrapped in an ogg container. The result will be a + // file which can be played natively on Android and in some browsers (such + // as Chrome). The quality of the encoding is considerably higher than MP3 + // while using the same bitrate. The sample rate is encoded in the payload. + OPUS_IN_OGG = 3; + } + + // *Required* The encoding of audio data to be returned in all `audio_out` + // messages. + Encoding encoding = 1; + + // *Required* The sample rate in Hertz of the audio data returned in + // `audio_out` messages. Valid values are: 16000-24000. + int32 sample_rate_hertz = 2; + + // *Required* Current volume setting of the device's audio output. + // Valid values are 1 to 100 (corresponding to 1% to 100%). + int32 volume_percentage = 3; +} + +// Provides information about the current dialog state. +message DialogStateIn { + // *Required* This field must always be set to the + // [DialogStateOut.conversation_state][google.assistant.embedded.v1alpha2.DialogStateOut.conversation_state] value that was returned in the prior + // `Assist` RPC. It should only be omitted (field not set) if there was no + // prior `Assist` RPC because this is the first `Assist` RPC made by this + // device after it was first setup and/or a factory-default reset. + bytes conversation_state = 1; + + // *Required* Language of the request in + // [IETF BCP 47 syntax](https://tools.ietf.org/html/bcp47). For example: + // "en-US". If you have selected a language for this `device_id` using the + // [Settings](https://developers.google.com/assistant/sdk/guides/assistant-settings) + // menu in your phone's Google Assistant app, that selection will override + // this value. + string language_code = 2; + + // *Optional* Location of the device where the query originated. + DeviceLocation device_location = 5; +} + +// The audio containing the Assistant's response to the query. Sequential chunks +// of audio data are received in sequential `AssistResponse` messages. +message AudioOut { + // *Output-only* The audio data containing the Assistant's response to the + // query. Sequential chunks of audio data are received in sequential + // `AssistResponse` messages. + bytes audio_data = 1; +} + +// The dialog state resulting from the user's query. Multiple of these messages +// may be received. +message DialogStateOut { + // Possible states of the microphone after a `Assist` RPC completes. + enum MicrophoneMode { + // No mode specified. + MICROPHONE_MODE_UNSPECIFIED = 0; + + // The service is not expecting a follow-on question from the user. + // The microphone should remain off until the user re-activates it. + CLOSE_MICROPHONE = 1; + + // The service is expecting a follow-on question from the user. The + // microphone should be re-opened when the `AudioOut` playback completes + // (by starting a new `Assist` RPC call to send the new audio). + DIALOG_FOLLOW_ON = 2; + } + + // *Output-only* Supplemental display text from the Assistant. This could be + // the same as the speech spoken in `AssistResponse.audio_out` or it could + // be some additional information which aids the user's understanding. + string supplemental_display_text = 1; + + // *Output-only* State information for the subsequent `Assist` RPC. This + // value should be saved in the client and returned in the + // [`DialogStateIn.conversation_state`](#dialogstatein) field with the next + // `Assist` RPC. (The client does not need to interpret or otherwise use this + // value.) This information should be saved across device reboots. However, + // this value should be cleared (not saved in the client) during a + // factory-default reset. + bytes conversation_state = 2; + + // *Output-only* Specifies the mode of the microphone after this `Assist` + // RPC is processed. + MicrophoneMode microphone_mode = 3; + + // *Output-only* Updated volume level. The value will be 0 or omitted + // (indicating no change) unless a voice command such as *Increase the volume* + // or *Set volume level 4* was recognized, in which case the value will be + // between 1 and 100 (corresponding to the new volume level of 1% to 100%). + // Typically, a client should use this volume level when playing the + // `audio_out` data, and retain this value as the current volume level and + // supply it in the `AudioOutConfig` of the next `AssistRequest`. (Some + // clients may also implement other ways to allow the current volume level to + // be changed, for example, by providing a knob that the user can turn.) + int32 volume_percentage = 4; +} + +// The top-level message sent by the client. Clients must send at least two, and +// typically numerous `AssistRequest` messages. The first message must +// contain a `config` message and must not contain `audio_in` data. All +// subsequent messages must contain `audio_in` data and must not contain a +// `config` message. +message AssistRequest { + // Exactly one of these fields must be specified in each `AssistRequest`. + oneof type { + // The `config` message provides information to the recognizer that + // specifies how to process the request. + // The first `AssistRequest` message must contain a `config` message. + AssistConfig config = 1; + + // The audio data to be recognized. Sequential chunks of audio data are sent + // in sequential `AssistRequest` messages. The first `AssistRequest` + // message must not contain `audio_in` data and all subsequent + // `AssistRequest` messages must contain `audio_in` data. The audio bytes + // must be encoded as specified in `AudioInConfig`. + // Audio must be sent at approximately real-time (16000 samples per second). + // An error will be returned if audio is sent significantly faster or + // slower. + bytes audio_in = 2; + } +} + +// The top-level message received by the client. A series of one or more +// `AssistResponse` messages are streamed back to the client. +message AssistResponse { + // Indicates the type of event. + enum EventType { + // No event specified. + EVENT_TYPE_UNSPECIFIED = 0; + + // This event indicates that the server has detected the end of the user's + // speech utterance and expects no additional speech. Therefore, the server + // will not process additional audio (although it may subsequently return + // additional results). The client should stop sending additional audio + // data, half-close the gRPC connection, and wait for any additional results + // until the server closes the gRPC connection. + END_OF_UTTERANCE = 1; + } + + // *Output-only* Indicates the type of event. + EventType event_type = 1; + + // *Output-only* The audio containing the Assistant's response to the query. + AudioOut audio_out = 3; + + // *Output-only* Contains the action triggered by the query with the + // appropriate payloads and semantic parsing. + DeviceAction device_action = 6; + + // *Output-only* This repeated list contains zero or more speech recognition + // results that correspond to consecutive portions of the audio currently + // being processed, starting with the portion corresponding to the earliest + // audio (and most stable portion) to the portion corresponding to the most + // recent audio. The strings can be concatenated to view the full + // in-progress response. When the speech recognition completes, this list + // will contain one item with `stability` of `1.0`. + repeated SpeechRecognitionResult speech_results = 2; + + // *Output-only* Contains output related to the user's query. + DialogStateOut dialog_state_out = 5; +} + +// The estimated transcription of a phrase the user has spoken. This could be +// a single segment or the full guess of the user's spoken query. +message SpeechRecognitionResult { + // *Output-only* Transcript text representing the words that the user spoke. + string transcript = 1; + + // *Output-only* An estimate of the likelihood that the Assistant will not + // change its guess about this result. Values range from 0.0 (completely + // unstable) to 1.0 (completely stable and final). The default of 0.0 is a + // sentinel value indicating `stability` was not set. + float stability = 2; +} + +// *Required* Fields that identify the device to the Assistant. +// +// See also: +// +// * [Register a Device - REST API](https://developers.google.com/assistant/sdk/reference/device-registration/register-device-manual) +// * [Device Model and Instance Schemas](https://developers.google.com/assistant/sdk/reference/device-registration/model-and-instance-schemas) +// * [Device Proto](https://developers.google.com/assistant/sdk/reference/rpc/google.assistant.devices.v1alpha2#device) +message DeviceConfig { + // *Required* Unique identifier for the device. The id length must be 128 + // characters or less. Example: DBCDW098234. This MUST match the device_id + // returned from device registration. This device_id is used to match against + // the user's registered devices to lookup the supported traits and + // capabilities of this device. This information should not change across + // device reboots. However, it should not be saved across + // factory-default resets. + string device_id = 1; + + // *Required* Unique identifier for the device model. The combination of + // device_model_id and device_id must have been previously associated through + // device registration. + string device_model_id = 3; +} + +// The response returned to the device if the user has triggered a Device +// Action. For example, a device which supports the query *Turn on the light* +// would receive a `DeviceAction` with a JSON payload containing the semantics +// of the request. +message DeviceAction { + // JSON containing the device command response generated from the triggered + // Device Action grammar. The format is given by the + // `action.devices.EXECUTE` intent for a given + // [trait](https://developers.google.com/assistant/sdk/reference/traits/). + string device_request_json = 1; +} + +// There are three sources of locations. They are used with this precedence: +// +// 1. This `DeviceLocation`, which is primarily used for mobile devices with +// GPS . +// 2. Location specified by the user during device setup; this is per-user, per +// device. This location is used if `DeviceLocation` is not specified. +// 3. Inferred location based on IP address. This is used only if neither of the +// above are specified. +message DeviceLocation { + oneof type { + // Latitude and longitude of device. + google.type.LatLng coordinates = 1; + } +} diff --git a/google/cloud/dataproc/artman_dataproc_v1.yaml b/google/cloud/dataproc/artman_dataproc_v1.yaml new file mode 100644 index 00000000..211938f7 --- /dev/null +++ b/google/cloud/dataproc/artman_dataproc_v1.yaml @@ -0,0 +1,34 @@ +common: + api_name: dataproc + api_version: v1 + organization_name: google-cloud + proto_deps: + - name: google-common-protos + src_proto_paths: + - v1 + service_yaml: v1/dataproc.yaml + gapic_yaml: v1/dataproc_gapic.yaml +artifacts: +- name: gapic_config + type: GAPIC_CONFIG +- name: java_gapic + type: GAPIC + language: JAVA +- name: python_gapic + type: GAPIC + language: PYTHON +- name: nodejs_gapic + type: GAPIC + language: NODEJS +- name: php_gapic + type: GAPIC + language: PHP +- name: go_gapic + type: GAPIC + language: GO +- name: ruby_gapic + type: GAPIC + language: RUBY +- name: csharp_gapic + type: GAPIC + language: CSHARP diff --git a/google/cloud/dataproc/artman_dataproc_v1beta2.yaml b/google/cloud/dataproc/artman_dataproc_v1beta2.yaml new file mode 100644 index 00000000..9917355a --- /dev/null +++ b/google/cloud/dataproc/artman_dataproc_v1beta2.yaml @@ -0,0 +1,34 @@ +common: + api_name: dataproc + api_version: v1beta2 + organization_name: google-cloud + proto_deps: + - name: google-common-protos + src_proto_paths: + - v1beta2 + service_yaml: v1beta2/dataproc.yaml + gapic_yaml: v1beta2/dataproc_gapic.yaml +artifacts: +- name: gapic_config + type: GAPIC_CONFIG +- name: java_gapic + type: GAPIC + language: JAVA +- name: python_gapic + type: GAPIC + language: PYTHON +- name: nodejs_gapic + type: GAPIC + language: NODEJS +- name: php_gapic + type: GAPIC + language: PHP +- name: go_gapic + type: GAPIC + language: GO +- name: ruby_gapic + type: GAPIC + language: RUBY +- name: csharp_gapic + type: GAPIC + language: CSHARP diff --git a/google/cloud/dataproc/v1/clusters.proto b/google/cloud/dataproc/v1/clusters.proto index fc7f45ea..721ed9bc 100644 --- a/google/cloud/dataproc/v1/clusters.proto +++ b/google/cloud/dataproc/v1/clusters.proto @@ -1,4 +1,4 @@ -// Copyright 2016 Google Inc. +// Copyright 2017 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -68,31 +68,45 @@ service ClusterController { // Describes the identifying information, config, and status of // a cluster of Google Compute Engine instances. message Cluster { - // [Required] The Google Cloud Platform project ID that the cluster belongs to. + // Required. The Google Cloud Platform project ID that the cluster belongs to. string project_id = 1; - // [Required] The cluster name. Cluster names within a project must be + // Required. The cluster name. Cluster names within a project must be // unique. Names of deleted clusters can be reused. string cluster_name = 2; - // [Required] The cluster config. Note that Cloud Dataproc may set + // Required. The cluster config. Note that Cloud Dataproc may set // default values, and values may change when clusters are updated. ClusterConfig config = 3; - // [Output-only] Cluster status. + // Optional. The labels to associate with this cluster. + // Label **keys** must contain 1 to 63 characters, and must conform to + // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // Label **values** may be empty, but, if present, must contain 1 to 63 + // characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // No more than 32 labels can be associated with a cluster. + map labels = 8; + + // Output-only. Cluster status. ClusterStatus status = 4; - // [Output-only] The previous cluster status. + // Output-only. The previous cluster status. repeated ClusterStatus status_history = 7; - // [Output-only] A cluster UUID (Unique Universal Identifier). Cloud Dataproc + // Output-only. A cluster UUID (Unique Universal Identifier). Cloud Dataproc // generates this value when it creates the cluster. string cluster_uuid = 6; + + // Contains cluster daemon metrics such as HDFS and YARN stats. + // + // **Beta Feature**: This report is available for testing purposes only. It may + // be changed before final release. + ClusterMetrics metrics = 9; } // The cluster config. message ClusterConfig { - // [Optional] A Google Cloud Storage staging bucket used for sharing generated + // Optional. A Google Cloud Storage staging bucket used for sharing generated // SSH keys and config. If you do not specify a staging bucket, Cloud // Dataproc will determine an appropriate Cloud Storage location (US, // ASIA, or EU) for your cluster's staging bucket according to the Google @@ -100,28 +114,28 @@ message ClusterConfig { // and manage this project-level, per-location bucket for you. string config_bucket = 1; - // [Required] The shared Google Compute Engine config settings for + // Required. The shared Google Compute Engine config settings for // all instances in a cluster. GceClusterConfig gce_cluster_config = 8; - // [Optional] The Google Compute Engine config settings for + // Optional. The Google Compute Engine config settings for // the master instance in a cluster. InstanceGroupConfig master_config = 9; - // [Optional] The Google Compute Engine config settings for + // Optional. The Google Compute Engine config settings for // worker instances in a cluster. InstanceGroupConfig worker_config = 10; - // [Optional] The Google Compute Engine config settings for + // Optional. The Google Compute Engine config settings for // additional worker instances in a cluster. InstanceGroupConfig secondary_worker_config = 12; - // [Optional] The config settings for software inside the cluster. + // Optional. The config settings for software inside the cluster. SoftwareConfig software_config = 13; - // [Optional] Commands to execute on each node after config is + // Optional. Commands to execute on each node after config is // completed. By default, executables are run on master and all worker nodes. - // You can test a node's role metadata to run an executable on + // You can test a node's `role` metadata to run an executable on // a master or worker node, as shown below using `curl` (you can also use `wget`): // // ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) @@ -136,24 +150,43 @@ message ClusterConfig { // Common config settings for resources of Google Compute Engine cluster // instances, applicable to all instances in the cluster. message GceClusterConfig { - // [Required] The zone where the Google Compute Engine cluster will be located. - // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`. + // Optional. The zone where the Google Compute Engine cluster will be located. + // On a create request, it is required in the "global" region. If omitted + // in a non-global Cloud Dataproc region, the service will pick a zone in the + // corresponding Compute Engine region. On a get request, zone will + // always be present. + // + // A full URL, partial URI, or short name are valid. Examples: + // + // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]` + // * `projects/[project_id]/zones/[zone]` + // * `us-central1-f` string zone_uri = 1; - // [Optional] The Google Compute Engine network to be used for machine + // Optional. The Google Compute Engine network to be used for machine // communications. Cannot be specified with subnetwork_uri. If neither // `network_uri` nor `subnetwork_uri` is specified, the "default" network of // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see // [Using Subnetworks](/compute/docs/subnetworks) for more information). - // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`. + // + // A full URL, partial URI, or short name are valid. Examples: + // + // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default` + // * `projects/[project_id]/regions/global/default` + // * `default` string network_uri = 2; - // [Optional] The Google Compute Engine subnetwork to be used for machine + // Optional. The Google Compute Engine subnetwork to be used for machine // communications. Cannot be specified with network_uri. - // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`. + // + // A full URL, partial URI, or short name are valid. Examples: + // + // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0` + // * `projects/[project_id]/regions/us-east1/sub0` + // * `sub0` string subnetwork_uri = 6; - // [Optional] If true, all instances in the cluster will only have internal IP + // Optional. If true, all instances in the cluster will only have internal IP // addresses. By default, clusters are not restricted to internal IP addresses, // and will have ephemeral external IP addresses assigned to each instance. // This `internal_ip_only` restriction can only be enabled for subnetwork @@ -161,7 +194,19 @@ message GceClusterConfig { // accessible without external IP addresses. bool internal_ip_only = 7; - // [Optional] The URIs of service account scopes to be included in Google + // Optional. The service account of the instances. Defaults to the default + // Google Compute Engine service account. Custom service accounts need + // permissions equivalent to the folloing IAM roles: + // + // * roles/logging.logWriter + // * roles/storage.objectAdmin + // + // (see https://cloud.google.com/compute/docs/access/service-accounts#custom_service_accounts + // for more information). + // Example: `[account_id]@[project_id].iam.gserviceaccount.com` + string service_account = 8; + + // Optional. The URIs of service account scopes to be included in Google // Compute Engine instances. The following base set of scopes is always // included: // @@ -178,7 +223,7 @@ message GceClusterConfig { repeated string service_account_scopes = 3; // The Google Compute Engine tags to add to all instances (see - // [Labeling instances](/compute/docs/label-or-tag-resources#labeling_instances)). + // [Tagging instances](/compute/docs/label-or-tag-resources#tags)). repeated string tags = 4; // The Google Compute Engine metadata entries to add to all instances (see @@ -186,54 +231,83 @@ message GceClusterConfig { map metadata = 5; } -// [Optional] The config settings for Google Compute Engine resources in +// Optional. The config settings for Google Compute Engine resources in // an instance group, such as a master or worker group. message InstanceGroupConfig { - // [Required] The number of VM instances in the instance group. + // Optional. The number of VM instances in the instance group. // For master instance groups, must be set to 1. int32 num_instances = 1; - // [Optional] The list of instance names. Cloud Dataproc derives the names from + // Optional. The list of instance names. Cloud Dataproc derives the names from // `cluster_name`, `num_instances`, and the instance group if not set by user // (recommended practice is to let Cloud Dataproc derive the name). repeated string instance_names = 2; - // [Output-only] The Google Compute Engine image resource used for cluster + // Output-only. The Google Compute Engine image resource used for cluster // instances. Inferred from `SoftwareConfig.image_version`. string image_uri = 3; - // [Required] The Google Compute Engine machine type used for cluster instances. - // Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`. + // Optional. The Google Compute Engine machine type used for cluster instances. + // + // A full URL, partial URI, or short name are valid. Examples: + // + // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2` + // * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2` + // * `n1-standard-2` string machine_type_uri = 4; - // [Optional] Disk option config settings. + // Optional. Disk option config settings. DiskConfig disk_config = 5; - // [Optional] Specifies that this instance group contains preemptible instances. + // Optional. Specifies that this instance group contains preemptible instances. bool is_preemptible = 6; - // [Output-only] The config for Google Compute Engine Instance Group + // Output-only. The config for Google Compute Engine Instance Group // Manager that manages this group. // This is only used for preemptible instance groups. ManagedGroupConfig managed_group_config = 7; + + // Optional. The Google Compute Engine accelerator configuration for these + // instances. + // + // **Beta Feature**: This feature is still under development. It may be + // changed before final release. + repeated AcceleratorConfig accelerators = 8; } // Specifies the resources used to actively manage an instance group. message ManagedGroupConfig { - // [Output-only] The name of the Instance Template used for the Managed + // Output-only. The name of the Instance Template used for the Managed // Instance Group. string instance_template_name = 1; - // [Output-only] The name of the Instance Group Manager for this group. + // Output-only. The name of the Instance Group Manager for this group. string instance_group_manager_name = 2; } +// Specifies the type and number of accelerator cards attached to the instances +// of an instance group (see [GPUs on Compute Engine](/compute/docs/gpus/)). +message AcceleratorConfig { + // Full URL, partial URI, or short name of the accelerator type resource to + // expose to this instance. See [Google Compute Engine AcceleratorTypes]( + // /compute/docs/reference/beta/acceleratorTypes) + // + // Examples + // * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80` + // * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80` + // * `nvidia-tesla-k80` + string accelerator_type_uri = 1; + + // The number of the accelerator cards of this type exposed to this instance. + int32 accelerator_count = 2; +} + // Specifies the config of disk options for a group of VM instances. message DiskConfig { - // [Optional] Size in GB of the boot disk (default is 500GB). + // Optional. Size in GB of the boot disk (default is 500GB). int32 boot_disk_size_gb = 1; - // [Optional] Number of attached SSDs, from 0 to 4 (default is 0). + // Optional. Number of attached SSDs, from 0 to 4 (default is 0). // If SSDs are not attached, the boot disk is used to store runtime logs and // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. // If one or more SSDs are attached, this runtime bulk @@ -245,10 +319,10 @@ message DiskConfig { // Specifies an executable to run on a fully configured node and a // timeout period for executable completion. message NodeInitializationAction { - // [Required] Google Cloud Storage URI of executable file. + // Required. Google Cloud Storage URI of executable file. string executable_file = 1; - // [Optional] Amount of time executable has to complete. Default is + // Optional. Amount of time executable has to complete. Default is // 10 minutes. Cluster creation fails with an explanatory error message (the // name of the executable that caused the error and the exceeded timeout // period) if the executable is not completed at end of the timeout period. @@ -278,71 +352,109 @@ message ClusterStatus { UPDATING = 5; } - // [Output-only] The cluster's state. + enum Substate { + UNSPECIFIED = 0; + + // The cluster is known to be in an unhealthy state + // (for example, critical daemons are not running or HDFS capacity is + // exhausted). + // + // Applies to RUNNING state. + UNHEALTHY = 1; + + // The agent-reported status is out of date (may occur if + // Cloud Dataproc loses communication with Agent). + // + // Applies to RUNNING state. + STALE_STATUS = 2; + } + + // Output-only. The cluster's state. State state = 1; - // [Output-only] Optional details of cluster's state. + // Output-only. Optional details of cluster's state. string detail = 2; - // [Output-only] Time when this state was entered. + // Output-only. Time when this state was entered. google.protobuf.Timestamp state_start_time = 3; + + // Output-only. Additional state information that includes + // status reported by the agent. + Substate substate = 4; } // Specifies the selection and config of software inside the cluster. message SoftwareConfig { - // [Optional] The version of software inside the cluster. It must match the + // Optional. The version of software inside the cluster. It must match the // regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the // latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)). string image_version = 1; - // [Optional] The properties to set on daemon config files. + // Optional. The properties to set on daemon config files. // // Property keys are specified in `prefix:property` format, such as // `core:fs.defaultFS`. The following are supported prefixes // and their mappings: // + // * capacity-scheduler: `capacity-scheduler.xml` // * core: `core-site.xml` + // * distcp: `distcp-default.xml` // * hdfs: `hdfs-site.xml` - // * mapred: `mapred-site.xml` - // * yarn: `yarn-site.xml` // * hive: `hive-site.xml` + // * mapred: `mapred-site.xml` // * pig: `pig.properties` // * spark: `spark-defaults.conf` + // * yarn: `yarn-site.xml` + // + // For more information, see + // [Cluster properties](/dataproc/docs/concepts/cluster-properties). map properties = 2; } +// Contains cluster daemon metrics, such as HDFS and YARN stats. +// +// **Beta Feature**: This report is available for testing purposes only. It may +// be changed before final release. +message ClusterMetrics { + // The HDFS metrics. + map hdfs_metrics = 1; + + // The YARN metrics. + map yarn_metrics = 2; +} + // A request to create a cluster. message CreateClusterRequest { - // [Required] The ID of the Google Cloud Platform project that the cluster + // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 3; - // [Required] The cluster to create. + // Required. The cluster to create. Cluster cluster = 2; } // A request to update a cluster. message UpdateClusterRequest { - // [Required] The ID of the Google Cloud Platform project the + // Required. The ID of the Google Cloud Platform project the // cluster belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 5; - // [Required] The cluster name. + // Required. The cluster name. string cluster_name = 2; - // [Required] The changes to the cluster. + // Required. The changes to the cluster. Cluster cluster = 3; - // [Required] Specifies the path, relative to Cluster, of + // Required. Specifies the path, relative to `Cluster`, of // the field to update. For example, to change the number of workers - // in a cluster to 5, the update_mask parameter would be - // specified as config.worker_config.num_instances, + // in a cluster to 5, the `update_mask` parameter would be + // specified as `config.worker_config.num_instances`, // and the `PATCH` request body would specify the new value, as follows: // // { @@ -352,9 +464,10 @@ message UpdateClusterRequest { // } // } // } - // Similarly, to change the number of preemptible workers in a cluster to 5, the - // update_mask parameter would be config.secondary_worker_config.num_instances, - // and the `PATCH` request body would be set as follows: + // Similarly, to change the number of preemptible workers in a cluster to 5, + // the `update_mask` parameter would be + // `config.secondary_worker_config.num_instances`, and the `PATCH` request + // body would be set as follows: // // { // "config":{ @@ -363,81 +476,121 @@ message UpdateClusterRequest { // } // } // } - // Note: Currently, config.worker_config.num_instances - // and config.secondary_worker_config.num_instances are the only - // fields that can be updated. + // Note: Currently, only the following fields can be updated: + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + // + //
MaskPurpose
labelsUpdate labels
config.worker_config.num_instancesResize primary worker group
config.secondary_worker_config.num_instancesResize secondary worker group
google.protobuf.FieldMask update_mask = 4; } // A request to delete a cluster. message DeleteClusterRequest { - // [Required] The ID of the Google Cloud Platform project that the cluster + // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 3; - // [Required] The cluster name. + // Required. The cluster name. string cluster_name = 2; } // Request to get the resource representation for a cluster in a project. message GetClusterRequest { - // [Required] The ID of the Google Cloud Platform project that the cluster + // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 3; - // [Required] The cluster name. + // Required. The cluster name. string cluster_name = 2; } // A request to list the clusters in a project. message ListClustersRequest { - // [Required] The ID of the Google Cloud Platform project that the cluster + // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 4; - // [Optional] The standard List page size. + // Optional. A filter constraining the clusters to list. Filters are + // case-sensitive and have the following syntax: + // + // field = value [AND [field = value]] ... + // + // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`, + // and `[KEY]` is a label key. **value** can be `*` to match all values. + // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`, + // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE` + // contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE` + // contains the `DELETING` and `ERROR` states. + // `clusterName` is the name of the cluster provided at creation time. + // Only the logical `AND` operator is supported; space-separated items are + // treated as having an implicit `AND` operator. + // + // Example filter: + // + // status.state = ACTIVE AND clusterName = mycluster + // AND labels.env = staging AND labels.starred = * + string filter = 5; + + // Optional. The standard List page size. int32 page_size = 2; - // [Optional] The standard List page token. + // Optional. The standard List page token. string page_token = 3; } // The list of all clusters in a project. message ListClustersResponse { - // [Output-only] The clusters in the project. + // Output-only. The clusters in the project. repeated Cluster clusters = 1; - // [Output-only] This token is included in the response if there are more + // Output-only. This token is included in the response if there are more // results to fetch. To fetch additional results, provide this value as the - // `page_token` in a subsequent ListClustersRequest. + // `page_token` in a subsequent `ListClustersRequest`. string next_page_token = 2; } // A request to collect cluster diagnostic information. message DiagnoseClusterRequest { - // [Required] The ID of the Google Cloud Platform project that the cluster + // Required. The ID of the Google Cloud Platform project that the cluster // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 3; - // [Required] The cluster name. + // Required. The cluster name. string cluster_name = 2; } // The location of diagnostic output. message DiagnoseClusterResults { - // [Output-only] The Google Cloud Storage URI of the diagnostic output. + // Output-only. The Google Cloud Storage URI of the diagnostic output. // The output report is a plain text file with a summary of collected // diagnostics. string output_uri = 1; diff --git a/google/cloud/dataproc/v1/dataproc.yaml b/google/cloud/dataproc/v1/dataproc.yaml new file mode 100644 index 00000000..2966bad2 --- /dev/null +++ b/google/cloud/dataproc/v1/dataproc.yaml @@ -0,0 +1,38 @@ +type: google.api.Service +config_version: 2 +name: dataproc.googleapis.com +title: Google Cloud Dataproc API + +apis: +- name: google.cloud.dataproc.v1.ClusterController +- name: google.cloud.dataproc.v1.JobController + +types: +- name: google.cloud.dataproc.v1.DiagnoseClusterResults +- name: google.cloud.dataproc.v1.ClusterOperationMetadata + +documentation: + summary: 'Manages Hadoop-based clusters and jobs on Google Cloud Platform.' + +http: + rules: + - selector: google.longrunning.Operations.ListOperations + get: '/v1/{name=projects/*/regions/*/operations}' + + - selector: google.longrunning.Operations.GetOperation + get: '/v1/{name=projects/*/regions/*/operations/*}' + + - selector: google.longrunning.Operations.DeleteOperation + delete: '/v1/{name=projects/*/regions/*/operations/*}' + + - selector: google.longrunning.Operations.CancelOperation + post: '/v1/{name=projects/*/regions/*/operations/*}:cancel' + + +authentication: + rules: + - selector: '*' + oauth: + canonical_scopes: |- + https://www.googleapis.com/auth/cloud-platform + diff --git a/google/cloud/dataproc/v1/dataproc_gapic.yaml b/google/cloud/dataproc/v1/dataproc_gapic.yaml new file mode 100644 index 00000000..b2d36ef3 --- /dev/null +++ b/google/cloud/dataproc/v1/dataproc_gapic.yaml @@ -0,0 +1,431 @@ +type: com.google.api.codegen.ConfigProto +config_schema_version: 1.0.0 +# The settings of generated code in a specific language. +language_settings: + java: + package_name: com.google.cloud.dataproc.v1 + python: + package_name: google.cloud.dataproc_v1.gapic + go: + package_name: cloud.google.com/go/dataproc/apiv1 + csharp: + package_name: Google.Cloud.Dataproc.V1 + ruby: + package_name: Google::Cloud::Dataproc::V1 + php: + package_name: Google\Cloud\Dataproc\V1 + nodejs: + package_name: dataproc.v1 + domain_layer_location: google-cloud +# The configuration for the license header to put on generated files. +license_header: + # The file containing the copyright line(s). + copyright_file: copyright-google.txt + # The file containing the raw license header without any copyright line(s). + license_file: license-header-apache-2.0.txt +# A list of API interface configurations. +interfaces: + # The fully qualified name of the API interface. +- name: google.cloud.dataproc.v1.ClusterController + # A list of resource collection configurations. + # Consists of a name_pattern and an entity_name. + # The name_pattern is a pattern to describe the names of the resources of this + # collection, using the platform's conventions for URI patterns. A generator + # may use this to generate methods to compose and decompose such names. The + # pattern should use named placeholders as in `shelves/{shelf}/books/{book}`; + # those will be taken as hints for the parameter names of the generated + # methods. If empty, no name methods are generated. + # The entity_name is the name to be used as a basis for generated methods and + # classes. + collections: [] + # Definition for smoke test. + smoke_test: + method: ListClusters + init_fields: + - project_id=$PROJECT_ID + - region="global" + # Definition for retryable codes. + retry_codes_def: + - name: idempotent + retry_codes: + - UNAVAILABLE + - DEADLINE_EXCEEDED + - name: non_idempotent + retry_codes: [] + # Definition for retry/backoff parameters. + retry_params_def: + - name: default + initial_retry_delay_millis: 100 + retry_delay_multiplier: 1.3 + max_retry_delay_millis: 60000 + initial_rpc_timeout_millis: 10000 + rpc_timeout_multiplier: 1 + max_rpc_timeout_millis: 10000 + total_timeout_millis: 300000 + # A list of method configurations. + # Common properties: + # + # name - The simple name of the method. + # + # flattening - Specifies the configuration for parameter flattening. + # Describes the parameter groups for which a generator should produce method + # overloads which allow a client to directly pass request message fields as + # method parameters. This information may or may not be used, depending on + # the target language. + # Consists of groups, which each represent a list of parameters to be + # flattened. Each parameter listed must be a field of the request message. + # + # required_fields - Fields that are always required for a request to be + # valid. + # + # request_object_method - Turns on or off the generation of a method whose + # sole parameter is a request object. Not all languages will generate this + # method. + # + # resource_name_treatment - An enum that specifies how to treat the resource + # name formats defined in the field_name_patterns and + # response_field_name_patterns fields. + # UNSET: default value + # NONE: the collection configs will not be used by the generated code. + # VALIDATE: string fields will be validated by the client against the + # specified resource name formats. + # STATIC_TYPES: the client will use generated types for resource names. + # + # page_streaming - Specifies the configuration for paging. + # Describes information for generating a method which transforms a paging + # list RPC into a stream of resources. + # Consists of a request and a response. + # The request specifies request information of the list method. It defines + # which fields match the paging pattern in the request. The request consists + # of a page_size_field and a token_field. The page_size_field is the name of + # the optional field specifying the maximum number of elements to be + # returned in the response. The token_field is the name of the field in the + # request containing the page token. + # The response specifies response information of the list method. It defines + # which fields match the paging pattern in the response. The response + # consists of a token_field and a resources_field. The token_field is the + # name of the field in the response containing the next page token. The + # resources_field is the name of the field in the response containing the + # list of resources belonging to the page. + # + # retry_codes_name - Specifies the configuration for retryable codes. The + # name must be defined in interfaces.retry_codes_def. + # + # retry_params_name - Specifies the configuration for retry/backoff + # parameters. The name must be defined in interfaces.retry_params_def. + # + # field_name_patterns - Maps the field name of the request type to + # entity_name of interfaces.collections. + # Specifies the string pattern that the field must follow. + # + # timeout_millis - Specifies the default timeout for a non-retrying call. If + # the call is retrying, refer to retry_params_name instead. + methods: + - name: CreateCluster + flattening: + groups: + - parameters: + - project_id + - region + - cluster + required_fields: + - project_id + - region + - cluster + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 90000 + long_running: + return_type: google.cloud.dataproc.v1.Cluster + metadata_type: google.cloud.dataproc.v1.ClusterOperationMetadata + implements_delete: true + implements_cancel: false + initial_poll_delay_millis: 1000 + poll_delay_multiplier: 2 + max_poll_delay_millis: 10000 + total_poll_timeout_millis: 300000 + - name: UpdateCluster + required_fields: + - project_id + - region + - cluster_name + - cluster + - update_mask + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 30000 + long_running: + return_type: google.cloud.dataproc.v1.Cluster + metadata_type: google.cloud.dataproc.v1.ClusterOperationMetadata + implements_delete: true + implements_cancel: false + initial_poll_delay_millis: 1000 + poll_delay_multiplier: 2 + max_poll_delay_millis: 10000 + total_poll_timeout_millis: 300000 + - name: DeleteCluster + flattening: + groups: + - parameters: + - project_id + - region + - cluster_name + required_fields: + - project_id + - region + - cluster_name + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 15000 + long_running: + return_type: google.protobuf.Empty + metadata_type: google.cloud.dataproc.v1.ClusterOperationMetadata + implements_delete: true + implements_cancel: false + initial_poll_delay_millis: 1000 + poll_delay_multiplier: 2 + max_poll_delay_millis: 10000 + total_poll_timeout_millis: 300000 + - name: GetCluster + flattening: + groups: + - parameters: + - project_id + - region + - cluster_name + required_fields: + - project_id + - region + - cluster_name + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 10000 + - name: ListClusters + flattening: + groups: + - parameters: + - project_id + - region + required_fields: + - project_id + - region + request_object_method: true + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: clusters + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 10000 + - name: DiagnoseCluster + flattening: + groups: + - parameters: + - project_id + - region + - cluster_name + required_fields: + - project_id + - region + - cluster_name + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 10000 + long_running: + return_type: google.protobuf.Empty + metadata_type: google.cloud.dataproc.v1.DiagnoseClusterResults + implements_delete: true + implements_cancel: false + initial_poll_delay_millis: 1000 + poll_delay_multiplier: 2 + max_poll_delay_millis: 10000 + total_poll_timeout_millis: 30000 + # The fully qualified name of the API interface. +- name: google.cloud.dataproc.v1.JobController + # A list of resource collection configurations. + # Consists of a name_pattern and an entity_name. + # The name_pattern is a pattern to describe the names of the resources of this + # collection, using the platform's conventions for URI patterns. A generator + # may use this to generate methods to compose and decompose such names. The + # pattern should use named placeholders as in `shelves/{shelf}/books/{book}`; + # those will be taken as hints for the parameter names of the generated + # methods. If empty, no name methods are generated. + # The entity_name is the name to be used as a basis for generated methods and + # classes. + collections: [] + # Definition for retryable codes. + retry_codes_def: + - name: idempotent + retry_codes: + - UNAVAILABLE + - DEADLINE_EXCEEDED + - name: non_idempotent + retry_codes: [] + # Definition for retry/backoff parameters. + retry_params_def: + - name: default + initial_retry_delay_millis: 100 + retry_delay_multiplier: 1.3 + max_retry_delay_millis: 60000 + initial_rpc_timeout_millis: 30000 + rpc_timeout_multiplier: 1 + max_rpc_timeout_millis: 30000 + total_timeout_millis: 600000 + # A list of method configurations. + # Common properties: + # + # name - The simple name of the method. + # + # flattening - Specifies the configuration for parameter flattening. + # Describes the parameter groups for which a generator should produce method + # overloads which allow a client to directly pass request message fields as + # method parameters. This information may or may not be used, depending on + # the target language. + # Consists of groups, which each represent a list of parameters to be + # flattened. Each parameter listed must be a field of the request message. + # + # required_fields - Fields that are always required for a request to be + # valid. + # + # request_object_method - Turns on or off the generation of a method whose + # sole parameter is a request object. Not all languages will generate this + # method. + # + # resource_name_treatment - An enum that specifies how to treat the resource + # name formats defined in the field_name_patterns and + # response_field_name_patterns fields. + # UNSET: default value + # NONE: the collection configs will not be used by the generated code. + # VALIDATE: string fields will be validated by the client against the + # specified resource name formats. + # STATIC_TYPES: the client will use generated types for resource names. + # + # page_streaming - Specifies the configuration for paging. + # Describes information for generating a method which transforms a paging + # list RPC into a stream of resources. + # Consists of a request and a response. + # The request specifies request information of the list method. It defines + # which fields match the paging pattern in the request. The request consists + # of a page_size_field and a token_field. The page_size_field is the name of + # the optional field specifying the maximum number of elements to be + # returned in the response. The token_field is the name of the field in the + # request containing the page token. + # The response specifies response information of the list method. It defines + # which fields match the paging pattern in the response. The response + # consists of a token_field and a resources_field. The token_field is the + # name of the field in the response containing the next page token. The + # resources_field is the name of the field in the response containing the + # list of resources belonging to the page. + # + # retry_codes_name - Specifies the configuration for retryable codes. The + # name must be defined in interfaces.retry_codes_def. + # + # retry_params_name - Specifies the configuration for retry/backoff + # parameters. The name must be defined in interfaces.retry_params_def. + # + # field_name_patterns - Maps the field name of the request type to + # entity_name of interfaces.collections. + # Specifies the string pattern that the field must follow. + # + # timeout_millis - Specifies the default timeout for a non-retrying call. If + # the call is retrying, refer to retry_params_name instead. + methods: + - name: SubmitJob + flattening: + groups: + - parameters: + - project_id + - region + - job + required_fields: + - project_id + - region + - job + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 30000 + - name: GetJob + flattening: + groups: + - parameters: + - project_id + - region + - job_id + required_fields: + - project_id + - region + - job_id + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 10000 + - name: ListJobs + flattening: + groups: + - parameters: + - project_id + - region + required_fields: + - project_id + - region + request_object_method: true + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: jobs + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 30000 + - name: UpdateJob + required_fields: + - project_id + - region + - job_id + - job + - update_mask + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 30000 + - name: CancelJob + flattening: + groups: + - parameters: + - project_id + - region + - job_id + required_fields: + - project_id + - region + - job_id + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 30000 + - name: DeleteJob + flattening: + groups: + - parameters: + - project_id + - region + - job_id + required_fields: + - project_id + - region + - job_id + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 60000 diff --git a/google/cloud/dataproc/v1/jobs.proto b/google/cloud/dataproc/v1/jobs.proto index 854ce9b9..0eadc084 100644 --- a/google/cloud/dataproc/v1/jobs.proto +++ b/google/cloud/dataproc/v1/jobs.proto @@ -1,4 +1,4 @@ -// Copyright 2016 Google Inc. +// Copyright 2017 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ package google.cloud.dataproc.v1; import "google/api/annotations.proto"; import "google/protobuf/empty.proto"; +import "google/protobuf/field_mask.proto"; import "google/protobuf/timestamp.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1;dataproc"; @@ -43,10 +44,15 @@ service JobController { option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs" }; } + // Updates a job in a project. + rpc UpdateJob(UpdateJobRequest) returns (Job) { + option (google.api.http) = { patch: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" body: "job" }; + } + // Starts a job cancellation request. To access the job resource // after cancellation, call - // [regions/{region}/jobs.list](/dataproc/reference/rest/v1/projects.regions.jobs/list) or - // [regions/{region}/jobs.get](/dataproc/reference/rest/v1/projects.regions.jobs/get). + // [regions/{region}/jobs.list](/dataproc/docs/reference/rest/v1/projects.regions.jobs/list) or + // [regions/{region}/jobs.get](/dataproc/docs/reference/rest/v1/projects.regions.jobs/get). rpc CancelJob(CancelJobRequest) returns (Job) { option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" body: "*" }; } @@ -103,7 +109,7 @@ message LoggingConfig { // [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) // jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). message HadoopJob { - // [Required] Indicates the location of the driver's main class. Specify + // Required. Indicates the location of the driver's main class. Specify // either the jar file that contains the main class or the main class name. // To specify both, add the jar file to `jar_file_uris`, and then specify // the main class name in this property. @@ -120,40 +126,40 @@ message HadoopJob { string main_class = 2; } - // [Optional] The arguments to pass to the driver. Do not + // Optional. The arguments to pass to the driver. Do not // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job // properties, since a collision may occur that causes an incorrect job // submission. repeated string args = 3; - // [Optional] Jar file URIs to add to the CLASSPATHs of the + // Optional. Jar file URIs to add to the CLASSPATHs of the // Hadoop driver and tasks. repeated string jar_file_uris = 4; - // [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied + // Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied // to the working directory of Hadoop drivers and distributed tasks. Useful // for naively parallel tasks. repeated string file_uris = 5; - // [Optional] HCFS URIs of archives to be extracted in the working directory of + // Optional. HCFS URIs of archives to be extracted in the working directory of // Hadoop drivers and tasks. Supported file types: // .jar, .tar, .tar.gz, .tgz, or .zip. repeated string archive_uris = 6; - // [Optional] A mapping of property names to values, used to configure Hadoop. + // Optional. A mapping of property names to values, used to configure Hadoop. // Properties that conflict with values set by the Cloud Dataproc API may be // overwritten. Can include properties set in /etc/hadoop/conf/*-site and // classes in user code. map properties = 7; - // [Optional] The runtime log config for job execution. + // Optional. The runtime log config for job execution. LoggingConfig logging_config = 8; } // A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/) // applications on YARN. message SparkJob { - // [Required] The specification of the main method to call to drive the job. + // Required. The specification of the main method to call to drive the job. // Specify either the jar file that contains the main class or the main class // name. To pass both a main jar and a main class in that jar, add the jar to // `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`. @@ -166,31 +172,31 @@ message SparkJob { string main_class = 2; } - // [Optional] The arguments to pass to the driver. Do not include arguments, + // Optional. The arguments to pass to the driver. Do not include arguments, // such as `--conf`, that can be set as job properties, since a collision may // occur that causes an incorrect job submission. repeated string args = 3; - // [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the + // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the // Spark driver and tasks. repeated string jar_file_uris = 4; - // [Optional] HCFS URIs of files to be copied to the working directory of + // Optional. HCFS URIs of files to be copied to the working directory of // Spark drivers and distributed tasks. Useful for naively parallel tasks. repeated string file_uris = 5; - // [Optional] HCFS URIs of archives to be extracted in the working directory + // Optional. HCFS URIs of archives to be extracted in the working directory // of Spark drivers and tasks. Supported file types: // .jar, .tar, .tar.gz, .tgz, and .zip. repeated string archive_uris = 6; - // [Optional] A mapping of property names to values, used to configure Spark. + // Optional. A mapping of property names to values, used to configure Spark. // Properties that conflict with values set by the Cloud Dataproc API may be // overwritten. Can include properties set in // /etc/spark/conf/spark-defaults.conf and classes in user code. map properties = 7; - // [Optional] The runtime log config for job execution. + // Optional. The runtime log config for job execution. LoggingConfig logging_config = 8; } @@ -198,44 +204,44 @@ message SparkJob { // [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html) // applications on YARN. message PySparkJob { - // [Required] The HCFS URI of the main Python file to use as the driver. Must + // Required. The HCFS URI of the main Python file to use as the driver. Must // be a .py file. string main_python_file_uri = 1; - // [Optional] The arguments to pass to the driver. Do not include arguments, + // Optional. The arguments to pass to the driver. Do not include arguments, // such as `--conf`, that can be set as job properties, since a collision may // occur that causes an incorrect job submission. repeated string args = 2; - // [Optional] HCFS file URIs of Python files to pass to the PySpark + // Optional. HCFS file URIs of Python files to pass to the PySpark // framework. Supported file types: .py, .egg, and .zip. repeated string python_file_uris = 3; - // [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the + // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the // Python driver and tasks. repeated string jar_file_uris = 4; - // [Optional] HCFS URIs of files to be copied to the working directory of + // Optional. HCFS URIs of files to be copied to the working directory of // Python drivers and distributed tasks. Useful for naively parallel tasks. repeated string file_uris = 5; - // [Optional] HCFS URIs of archives to be extracted in the working directory of + // Optional. HCFS URIs of archives to be extracted in the working directory of // .jar, .tar, .tar.gz, .tgz, and .zip. repeated string archive_uris = 6; - // [Optional] A mapping of property names to values, used to configure PySpark. + // Optional. A mapping of property names to values, used to configure PySpark. // Properties that conflict with values set by the Cloud Dataproc API may be // overwritten. Can include properties set in // /etc/spark/conf/spark-defaults.conf and classes in user code. map properties = 7; - // [Optional] The runtime log config for job execution. + // Optional. The runtime log config for job execution. LoggingConfig logging_config = 8; } // A list of queries to run on a cluster. message QueryList { - // [Required] The queries to execute. You do not need to terminate a query + // Required. The queries to execute. You do not need to terminate a query // with a semicolon. Multiple queries can be specified in one string // by separating each with a semicolon. Here is an example of an Cloud // Dataproc API snippet that uses a QueryList to specify a HiveJob: @@ -255,7 +261,7 @@ message QueryList { // A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/) // queries on YARN. message HiveJob { - // [Required] The sequence of Hive queries to execute, specified as either + // Required. The sequence of Hive queries to execute, specified as either // an HCFS file URI or a list of queries. oneof queries { // The HCFS URI of the script that contains Hive queries. @@ -265,22 +271,22 @@ message HiveJob { QueryList query_list = 2; } - // [Optional] Whether to continue executing queries if a query fails. + // Optional. Whether to continue executing queries if a query fails. // The default value is `false`. Setting to `true` can be useful when executing // independent parallel queries. bool continue_on_failure = 3; - // [Optional] Mapping of query variable names to values (equivalent to the + // Optional. Mapping of query variable names to values (equivalent to the // Hive command: `SET name="value";`). map script_variables = 4; - // [Optional] A mapping of property names and values, used to configure Hive. + // Optional. A mapping of property names and values, used to configure Hive. // Properties that conflict with values set by the Cloud Dataproc API may be // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, // /etc/hive/conf/hive-site.xml, and classes in user code. map properties = 5; - // [Optional] HCFS URIs of jar files to add to the CLASSPATH of the + // Optional. HCFS URIs of jar files to add to the CLASSPATH of the // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes // and UDFs. repeated string jar_file_uris = 6; @@ -289,7 +295,7 @@ message HiveJob { // A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/) // queries. message SparkSqlJob { - // [Required] The sequence of Spark SQL queries to execute, specified as + // Required. The sequence of Spark SQL queries to execute, specified as // either an HCFS file URI or as a list of queries. oneof queries { // The HCFS URI of the script that contains SQL queries. @@ -299,26 +305,26 @@ message SparkSqlJob { QueryList query_list = 2; } - // [Optional] Mapping of query variable names to values (equivalent to the + // Optional. Mapping of query variable names to values (equivalent to the // Spark SQL command: SET `name="value";`). map script_variables = 3; - // [Optional] A mapping of property names to values, used to configure + // Optional. A mapping of property names to values, used to configure // Spark SQL's SparkConf. Properties that conflict with values set by the // Cloud Dataproc API may be overwritten. map properties = 4; - // [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH. + // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH. repeated string jar_file_uris = 56; - // [Optional] The runtime log config for job execution. + // Optional. The runtime log config for job execution. LoggingConfig logging_config = 6; } // A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/) // queries on YARN. message PigJob { - // [Required] The sequence of Pig queries to execute, specified as an HCFS + // Required. The sequence of Pig queries to execute, specified as an HCFS // file URI or a list of queries. oneof queries { // The HCFS URI of the script that contains the Pig queries. @@ -328,35 +334,35 @@ message PigJob { QueryList query_list = 2; } - // [Optional] Whether to continue executing queries if a query fails. + // Optional. Whether to continue executing queries if a query fails. // The default value is `false`. Setting to `true` can be useful when executing // independent parallel queries. bool continue_on_failure = 3; - // [Optional] Mapping of query variable names to values (equivalent to the Pig + // Optional. Mapping of query variable names to values (equivalent to the Pig // command: `name=[value]`). map script_variables = 4; - // [Optional] A mapping of property names to values, used to configure Pig. + // Optional. A mapping of property names to values, used to configure Pig. // Properties that conflict with values set by the Cloud Dataproc API may be // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, // /etc/pig/conf/pig.properties, and classes in user code. map properties = 5; - // [Optional] HCFS URIs of jar files to add to the CLASSPATH of + // Optional. HCFS URIs of jar files to add to the CLASSPATH of // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. repeated string jar_file_uris = 6; - // [Optional] The runtime log config for job execution. + // Optional. The runtime log config for job execution. LoggingConfig logging_config = 7; } // Cloud Dataproc job config. message JobPlacement { - // [Required] The name of the cluster where the job will be submitted. + // Required. The name of the cluster where the job will be submitted. string cluster_name = 1; - // [Output-only] A cluster UUID generated by the Cloud Dataproc service when + // Output-only. A cluster UUID generated by the Cloud Dataproc service when // the job is submitted. string cluster_uuid = 2; } @@ -393,46 +399,132 @@ message JobStatus { // The job has completed, but encountered an error. ERROR = 6; + + // Job attempt has failed. The detail field contains failure details for + // this attempt. + // + // Applies to restartable jobs only. + ATTEMPT_FAILURE = 9; } - // [Output-only] A state message specifying the overall job state. + enum Substate { + UNSPECIFIED = 0; + + // The Job is submitted to the agent. + // + // Applies to RUNNING state. + SUBMITTED = 1; + + // The Job has been received and is awaiting execution (it may be waiting + // for a condition to be met). See the "details" field for the reason for + // the delay. + // + // Applies to RUNNING state. + QUEUED = 2; + + // The agent-reported status is out of date, which may be caused by a + // loss of communication between the agent and Cloud Dataproc. If the + // agent does not send a timely update, the job will fail. + // + // Applies to RUNNING state. + STALE_STATUS = 3; + } + + // Output-only. A state message specifying the overall job state. State state = 1; - // [Output-only] Optional job state details, such as an error + // Output-only. Optional job state details, such as an error // description if the state is ERROR. string details = 2; - // [Output-only] The time when this state was entered. + // Output-only. The time when this state was entered. google.protobuf.Timestamp state_start_time = 6; + + // Output-only. Additional state information, which includes + // status reported by the agent. + Substate substate = 7; } // Encapsulates the full scoping used to reference a job. message JobReference { - // [Required] The ID of the Google Cloud Platform project that the job + // Required. The ID of the Google Cloud Platform project that the job // belongs to. string project_id = 1; - // [Optional] The job ID, which must be unique within the project. The job ID + // Optional. The job ID, which must be unique within the project. The job ID // is generated by the server upon job submission or provided by the user as a // means to perform retries without creating duplicate jobs. The ID must // contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or - // hyphens (-). The maximum length is 512 characters. + // hyphens (-). The maximum length is 100 characters. string job_id = 2; } +// A YARN application created by a job. Application information is a subset of +// org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto. +// +// **Beta Feature**: This report is available for testing purposes only. It may +// be changed before final release. +message YarnApplication { + // The application state, corresponding to + // YarnProtos.YarnApplicationStateProto. + enum State { + // Status is unspecified. + STATE_UNSPECIFIED = 0; + + // Status is NEW. + NEW = 1; + + // Status is NEW_SAVING. + NEW_SAVING = 2; + + // Status is SUBMITTED. + SUBMITTED = 3; + + // Status is ACCEPTED. + ACCEPTED = 4; + + // Status is RUNNING. + RUNNING = 5; + + // Status is FINISHED. + FINISHED = 6; + + // Status is FAILED. + FAILED = 7; + + // Status is KILLED. + KILLED = 8; + } + + // Required. The application name. + string name = 1; + + // Required. The application state. + State state = 2; + + // Required. The numerical progress of the application, from 1 to 100. + float progress = 3; + + // Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or + // TimelineServer that provides application-specific information. The URL uses + // the internal hostname, and requires a proxy server for resolution and, + // possibly, access. + string tracking_url = 4; +} + // A Cloud Dataproc job resource. message Job { - // [Optional] The fully qualified reference to the job, which can be used to + // Optional. The fully qualified reference to the job, which can be used to // obtain the equivalent REST path of the job resource. If this property // is not specified when a job is created, the server generates a // job_id. JobReference reference = 1; - // [Required] Job information, including how, when, and where to + // Required. Job information, including how, when, and where to // run the job. JobPlacement placement = 2; - // [Required] The application/framework-specific portion of the job. + // Required. The application/framework-specific portion of the job. oneof type_job { // Job is a Hadoop job. HadoopJob hadoop_job = 3; @@ -453,47 +545,80 @@ message Job { SparkSqlJob spark_sql_job = 12; } - // [Output-only] The job status. Additional application-specific + // Output-only. The job status. Additional application-specific // status information may be contained in the type_job // and yarn_applications fields. JobStatus status = 8; - // [Output-only] The previous job status. + // Output-only. The previous job status. repeated JobStatus status_history = 13; - // [Output-only] A URI pointing to the location of the stdout of the job's + // Output-only. The collection of YARN applications spun up by this job. + // + // **Beta** Feature: This report is available for testing purposes only. It may + // be changed before final release. + repeated YarnApplication yarn_applications = 9; + + // Output-only. A URI pointing to the location of the stdout of the job's // driver program. string driver_output_resource_uri = 17; - // [Output-only] If present, the location of miscellaneous control files + // Output-only. If present, the location of miscellaneous control files // which may be used as part of job setup and handling. If not present, // control files may be placed in the same location as `driver_output_uri`. string driver_control_files_uri = 15; + + // Optional. The labels to associate with this job. + // Label **keys** must contain 1 to 63 characters, and must conform to + // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // Label **values** may be empty, but, if present, must contain 1 to 63 + // characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // No more than 32 labels can be associated with a job. + map labels = 18; + + // Optional. Job scheduling configuration. + JobScheduling scheduling = 20; +} + +// Job scheduling options. +// +// **Beta Feature**: These options are available for testing purposes only. +// They may be changed before final release. +message JobScheduling { + // Optional. Maximum number of times per hour a driver may be restarted as + // a result of driver terminating with non-zero code before job is + // reported failed. + // + // A job may be reported as thrashing if driver exits with non-zero code + // 4 times within 10 minute window. + // + // Maximum value is 10. + int32 max_failures_per_hour = 1; } // A request to submit a job. message SubmitJobRequest { - // [Required] The ID of the Google Cloud Platform project that the job + // Required. The ID of the Google Cloud Platform project that the job // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 3; - // [Required] The job resource. + // Required. The job resource. Job job = 2; } // A request to get the resource representation for a job in a project. message GetJobRequest { - // [Required] The ID of the Google Cloud Platform project that the job + // Required. The ID of the Google Cloud Platform project that the job // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 3; - // [Required] The job ID. + // Required. The job ID. string job_id = 2; } @@ -512,35 +637,77 @@ message ListJobsRequest { NON_ACTIVE = 2; } - // [Required] The ID of the Google Cloud Platform project that the job + // Required. The ID of the Google Cloud Platform project that the job // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 6; - // [Optional] The number of results to return in each response. + // Optional. The number of results to return in each response. int32 page_size = 2; - // [Optional] The page token, returned by a previous call, to request the + // Optional. The page token, returned by a previous call, to request the // next page of results. string page_token = 3; - // [Optional] If set, the returned jobs list includes only jobs that were + // Optional. If set, the returned jobs list includes only jobs that were // submitted to the named cluster. string cluster_name = 4; - // [Optional] Specifies enumerated categories of jobs to list + // Optional. Specifies enumerated categories of jobs to list. // (default = match ALL jobs). + // + // If `filter` is provided, `jobStateMatcher` will be ignored. JobStateMatcher job_state_matcher = 5; + + // Optional. A filter constraining the jobs to list. Filters are + // case-sensitive and have the following syntax: + // + // [field = value] AND [field [= value]] ... + // + // where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label + // key. **value** can be `*` to match all values. + // `status.state` can be either `ACTIVE` or `NON_ACTIVE`. + // Only the logical `AND` operator is supported; space-separated items are + // treated as having an implicit `AND` operator. + // + // Example filter: + // + // status.state = ACTIVE AND labels.env = staging AND labels.starred = * + string filter = 7; +} + +// A request to update a job. +message UpdateJobRequest { + // Required. The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 2; + + // Required. The job ID. + string job_id = 3; + + // Required. The changes to the job. + Job job = 4; + + // Required. Specifies the path, relative to Job, of + // the field to update. For example, to update the labels of a Job the + // update_mask parameter would be specified as + // labels, and the `PATCH` request body would specify the new + // value. Note: Currently, labels is the only + // field that can be updated. + google.protobuf.FieldMask update_mask = 5; } // A list of jobs in a project. message ListJobsResponse { - // [Output-only] Jobs list. + // Output-only. Jobs list. repeated Job jobs = 1; - // [Optional] This token is included in the response if there are more results + // Optional. This token is included in the response if there are more results // to fetch. To fetch additional results, provide this value as the // `page_token` in a subsequent ListJobsRequest. string next_page_token = 2; @@ -548,26 +715,26 @@ message ListJobsResponse { // A request to cancel a job. message CancelJobRequest { - // [Required] The ID of the Google Cloud Platform project that the job + // Required. The ID of the Google Cloud Platform project that the job // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 3; - // [Required] The job ID. + // Required. The job ID. string job_id = 2; } // A request to delete a job. message DeleteJobRequest { - // [Required] The ID of the Google Cloud Platform project that the job + // Required. The ID of the Google Cloud Platform project that the job // belongs to. string project_id = 1; - // [Required] The Cloud Dataproc region in which to handle the request. + // Required. The Cloud Dataproc region in which to handle the request. string region = 3; - // [Required] The job ID. + // Required. The job ID. string job_id = 2; } diff --git a/google/cloud/dataproc/v1/operations.proto b/google/cloud/dataproc/v1/operations.proto index 61227ed2..aeca8c8b 100644 --- a/google/cloud/dataproc/v1/operations.proto +++ b/google/cloud/dataproc/v1/operations.proto @@ -1,4 +1,4 @@ -// Copyright 2016 Google Inc. +// Copyright 2017 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -44,36 +44,42 @@ message ClusterOperationStatus { DONE = 3; } - // [Output-only] A message containing the operation state. + // Output-only. A message containing the operation state. State state = 1; - // [Output-only] A message containing the detailed operation state. + // Output-only. A message containing the detailed operation state. string inner_state = 2; - // [Output-only]A message containing any operation metadata details. + // Output-only.A message containing any operation metadata details. string details = 3; - // [Output-only] The time this state was entered. + // Output-only. The time this state was entered. google.protobuf.Timestamp state_start_time = 4; } // Metadata describing the operation. message ClusterOperationMetadata { - // [Output-only] Name of the cluster for the operation. + // Output-only. Name of the cluster for the operation. string cluster_name = 7; - // [Output-only] Cluster UUID for the operation. + // Output-only. Cluster UUID for the operation. string cluster_uuid = 8; - // [Output-only] Current operation status. + // Output-only. Current operation status. ClusterOperationStatus status = 9; - // [Output-only] The previous operation status. + // Output-only. The previous operation status. repeated ClusterOperationStatus status_history = 10; - // [Output-only] The operation type. + // Output-only. The operation type. string operation_type = 11; - // [Output-only] Short description of operation. + // Output-only. Short description of operation. string description = 12; + + // Output-only. Labels associated with the operation + map labels = 13; + + // Output-only. Errors encountered during operation execution. + repeated string warnings = 14; } diff --git a/google/cloud/dataproc/v1beta2/clusters.proto b/google/cloud/dataproc/v1beta2/clusters.proto new file mode 100644 index 00000000..abab7cbc --- /dev/null +++ b/google/cloud/dataproc/v1beta2/clusters.proto @@ -0,0 +1,611 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.dataproc.v1beta2; + +import "google/api/annotations.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/duration.proto"; +import "google/protobuf/field_mask.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc"; +option java_multiple_files = true; +option java_outer_classname = "ClustersProto"; +option java_package = "com.google.cloud.dataproc.v1beta2"; + + +// The ClusterControllerService provides methods to manage clusters +// of Google Compute Engine instances. +service ClusterController { + // Creates a cluster in a project. + rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { post: "/v1beta2/projects/{project_id}/regions/{region}/clusters" body: "cluster" }; + } + + // Updates a cluster in a project. + rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { patch: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}" body: "cluster" }; + } + + // Deletes a cluster in a project. + rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { delete: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}" }; + } + + // Gets the resource representation for a cluster in a project. + rpc GetCluster(GetClusterRequest) returns (Cluster) { + option (google.api.http) = { get: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}" }; + } + + // Lists all regions/{region}/clusters in a project. + rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) { + option (google.api.http) = { get: "/v1beta2/projects/{project_id}/regions/{region}/clusters" }; + } + + // Gets cluster diagnostic information. + // After the operation completes, the Operation.response field + // contains `DiagnoseClusterOutputLocation`. + rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { post: "/v1beta2/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" body: "*" }; + } +} + +// Describes the identifying information, config, and status of +// a cluster of Google Compute Engine instances. +message Cluster { + // Required. The Google Cloud Platform project ID that the cluster belongs to. + string project_id = 1; + + // Required. The cluster name. Cluster names within a project must be + // unique. Names of deleted clusters can be reused. + string cluster_name = 2; + + // Required. The cluster config. Note that Cloud Dataproc may set + // default values, and values may change when clusters are updated. + ClusterConfig config = 3; + + // Optional. The labels to associate with this cluster. + // Label **keys** must contain 1 to 63 characters, and must conform to + // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // Label **values** may be empty, but, if present, must contain 1 to 63 + // characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // No more than 32 labels can be associated with a cluster. + map labels = 8; + + // Output-only. Cluster status. + ClusterStatus status = 4; + + // Output-only. The previous cluster status. + repeated ClusterStatus status_history = 7; + + // Output-only. A cluster UUID (Unique Universal Identifier). Cloud Dataproc + // generates this value when it creates the cluster. + string cluster_uuid = 6; + + // Contains cluster daemon metrics such as HDFS and YARN stats. + // + // **Beta Feature**: This report is available for testing purposes only. It may + // be changed before final release. + ClusterMetrics metrics = 9; +} + +// The cluster config. +message ClusterConfig { + // Optional. A Google Cloud Storage staging bucket used for sharing generated + // SSH keys and config. If you do not specify a staging bucket, Cloud + // Dataproc will determine an appropriate Cloud Storage location (US, + // ASIA, or EU) for your cluster's staging bucket according to the Google + // Compute Engine zone where your cluster is deployed, and then it will create + // and manage this project-level, per-location bucket for you. + string config_bucket = 1; + + // Required. The shared Google Compute Engine config settings for + // all instances in a cluster. + GceClusterConfig gce_cluster_config = 8; + + // Optional. The Google Compute Engine config settings for + // the master instance in a cluster. + InstanceGroupConfig master_config = 9; + + // Optional. The Google Compute Engine config settings for + // worker instances in a cluster. + InstanceGroupConfig worker_config = 10; + + // Optional. The Google Compute Engine config settings for + // additional worker instances in a cluster. + InstanceGroupConfig secondary_worker_config = 12; + + // Optional. The config settings for software inside the cluster. + SoftwareConfig software_config = 13; + + // Optional. The config setting for auto delete cluster schedule. + LifecycleConfig lifecycle_config = 14; + + // Optional. Commands to execute on each node after config is + // completed. By default, executables are run on master and all worker nodes. + // You can test a node's role metadata to run an executable on + // a master or worker node, as shown below using `curl` (you can also use `wget`): + // + // ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1beta2/instance/attributes/dataproc-role) + // if [[ "${ROLE}" == 'Master' ]]; then + // ... master specific actions ... + // else + // ... worker specific actions ... + // fi + repeated NodeInitializationAction initialization_actions = 11; +} + +// Common config settings for resources of Google Compute Engine cluster +// instances, applicable to all instances in the cluster. +message GceClusterConfig { + // Optional. The zone where the Google Compute Engine cluster will be located. + // On a create request, it is required in the "global" region. If omitted + // in a non-global Cloud Dataproc region, the service will pick a zone in the + // corresponding Compute Engine region. On a get request, zone will always be + // present. + // + // A full URL, partial URI, or short name are valid. Examples: + // + // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]` + // * `projects/[project_id]/zones/[zone]` + // * `us-central1-f` + string zone_uri = 1; + + // Optional. The Google Compute Engine network to be used for machine + // communications. Cannot be specified with subnetwork_uri. If neither + // `network_uri` nor `subnetwork_uri` is specified, the "default" network of + // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see + // [Using Subnetworks](/compute/docs/subnetworks) for more information). + // + // A full URL, partial URI, or short name are valid. Examples: + // + // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default` + // * `projects/[project_id]/regions/global/default` + // * `default` + string network_uri = 2; + + // Optional. The Google Compute Engine subnetwork to be used for machine + // communications. Cannot be specified with network_uri. + // + // A full URL, partial URI, or short name are valid. Examples: + // + // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0` + // * `projects/[project_id]/regions/us-east1/sub0` + // * `sub0` + string subnetwork_uri = 6; + + // Optional. If true, all instances in the cluster will only have internal IP + // addresses. By default, clusters are not restricted to internal IP addresses, + // and will have ephemeral external IP addresses assigned to each instance. + // This `internal_ip_only` restriction can only be enabled for subnetwork + // enabled networks, and all off-cluster dependencies must be configured to be + // accessible without external IP addresses. + bool internal_ip_only = 7; + + // Optional. The service account of the instances. Defaults to the default + // Google Compute Engine service account. Custom service accounts need + // permissions equivalent to the folloing IAM roles: + // + // * roles/logging.logWriter + // * roles/storage.objectAdmin + // + // (see https://cloud.google.com/compute/docs/access/service-accounts#custom_service_accounts + // for more information). + // Example: `[account_id]@[project_id].iam.gserviceaccount.com` + string service_account = 8; + + // Optional. The URIs of service account scopes to be included in Google + // Compute Engine instances. The following base set of scopes is always + // included: + // + // * https://www.googleapis.com/auth/cloud.useraccounts.readonly + // * https://www.googleapis.com/auth/devstorage.read_write + // * https://www.googleapis.com/auth/logging.write + // + // If no scopes are specified, the following defaults are also provided: + // + // * https://www.googleapis.com/auth/bigquery + // * https://www.googleapis.com/auth/bigtable.admin.table + // * https://www.googleapis.com/auth/bigtable.data + // * https://www.googleapis.com/auth/devstorage.full_control + repeated string service_account_scopes = 3; + + // The Google Compute Engine tags to add to all instances (see + // [Tagging instances](/compute/docs/label-or-tag-resources#tags)). + repeated string tags = 4; + + // The Google Compute Engine metadata entries to add to all instances (see + // [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)). + map metadata = 5; +} + +// Optional. The config settings for Google Compute Engine resources in +// an instance group, such as a master or worker group. +message InstanceGroupConfig { + // Optional. The number of VM instances in the instance group. + // For master instance groups, must be set to 1. + int32 num_instances = 1; + + // Optional. The list of instance names. Cloud Dataproc derives the names from + // `cluster_name`, `num_instances`, and the instance group if not set by user + // (recommended practice is to let Cloud Dataproc derive the name). + repeated string instance_names = 2; + + // Output-only. The Google Compute Engine image resource used for cluster + // instances. Inferred from `SoftwareConfig.image_version`. + string image_uri = 3; + + // Optional. The Google Compute Engine machine type used for cluster instances. + // + // A full URL, partial URI, or short name are valid. Examples: + // + // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2` + // * `projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2` + // * `n1-standard-2` + string machine_type_uri = 4; + + // Optional. Disk option config settings. + DiskConfig disk_config = 5; + + // Optional. Specifies that this instance group contains preemptible instances. + bool is_preemptible = 6; + + // Output-only. The config for Google Compute Engine Instance Group + // Manager that manages this group. + // This is only used for preemptible instance groups. + ManagedGroupConfig managed_group_config = 7; + + // Optional. The Google Compute Engine accelerator configuration for these + // instances. + // + // **Beta Feature**: This feature is still under development. It may be + // changed before final release. + repeated AcceleratorConfig accelerators = 8; +} + +// Specifies the resources used to actively manage an instance group. +message ManagedGroupConfig { + // Output-only. The name of the Instance Template used for the Managed + // Instance Group. + string instance_template_name = 1; + + // Output-only. The name of the Instance Group Manager for this group. + string instance_group_manager_name = 2; +} + +// Specifies the type and number of accelerator cards attached to the instances +// of an instance group (see [GPUs on Compute Engine](/compute/docs/gpus/)). +message AcceleratorConfig { + // Full URL, partial URI, or short name of the accelerator type resource to + // expose to this instance. See [Google Compute Engine AcceleratorTypes]( + // /compute/docs/reference/beta/acceleratorTypes) + // + // Examples + // * `https://www.googleapis.com/compute/beta/projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80` + // * `projects/[project_id]/zones/us-east1-a/acceleratorTypes/nvidia-tesla-k80` + // * `nvidia-tesla-k80` + string accelerator_type_uri = 1; + + // The number of the accelerator cards of this type exposed to this instance. + int32 accelerator_count = 2; +} + +// Specifies the config of disk options for a group of VM instances. +message DiskConfig { + // Optional. Size in GB of the boot disk (default is 500GB). + int32 boot_disk_size_gb = 1; + + // Optional. Number of attached SSDs, from 0 to 4 (default is 0). + // If SSDs are not attached, the boot disk is used to store runtime logs and + // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. + // If one or more SSDs are attached, this runtime bulk + // data is spread across them, and the boot disk contains only basic + // config and installed binaries. + int32 num_local_ssds = 2; +} + +// Specifies the cluster auto delete related schedule configuration. +message LifecycleConfig { + // Optional. The longest duration that cluster would keep alive while staying + // idle; passing this threshold will cause cluster to be auto-deleted. + google.protobuf.Duration idle_delete_ttl = 1; + + oneof ttl { + // Optional. The time when cluster will be auto-deleted. + google.protobuf.Timestamp auto_delete_time = 2; + + // Optional. The life duration of cluster, the cluster will be auto-deleted + // at the end of this duration. + google.protobuf.Duration auto_delete_ttl = 3; + } +} + +// Specifies an executable to run on a fully configured node and a +// timeout period for executable completion. +message NodeInitializationAction { + // Required. Google Cloud Storage URI of executable file. + string executable_file = 1; + + // Optional. Amount of time executable has to complete. Default is + // 10 minutes. Cluster creation fails with an explanatory error message (the + // name of the executable that caused the error and the exceeded timeout + // period) if the executable is not completed at end of the timeout period. + google.protobuf.Duration execution_timeout = 2; +} + +// The status of a cluster and its instances. +message ClusterStatus { + // The cluster state. + enum State { + // The cluster state is unknown. + UNKNOWN = 0; + + // The cluster is being created and set up. It is not ready for use. + CREATING = 1; + + // The cluster is currently running and healthy. It is ready for use. + RUNNING = 2; + + // The cluster encountered an error. It is not ready for use. + ERROR = 3; + + // The cluster is being deleted. It cannot be used. + DELETING = 4; + + // The cluster is being updated. It continues to accept and process jobs. + UPDATING = 5; + } + + enum Substate { + UNSPECIFIED = 0; + + // The cluster is known to be in an unhealthy state + // (for example, critical daemons are not running or HDFS capacity is + // exhausted). + // + // Applies to RUNNING state. + UNHEALTHY = 1; + + // The agent-reported status is out of date (may occur if + // Cloud Dataproc loses communication with Agent). + // + // Applies to RUNNING state. + STALE_STATUS = 2; + } + + // Output-only. The cluster's state. + State state = 1; + + // Output-only. Optional details of cluster's state. + string detail = 2; + + // Output-only. Time when this state was entered. + google.protobuf.Timestamp state_start_time = 3; + + // Output-only. Additional state information that includes + // status reported by the agent. + Substate substate = 4; +} + +// Specifies the selection and config of software inside the cluster. +message SoftwareConfig { + // Optional. The version of software inside the cluster. It must match the + // regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the + // latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)). + string image_version = 1; + + // Optional. The properties to set on daemon config files. + // + // Property keys are specified in `prefix:property` format, such as + // `core:fs.defaultFS`. The following are supported prefixes + // and their mappings: + // + // * capacity-scheduler: `capacity-scheduler.xml` + // * core: `core-site.xml` + // * distcp: `distcp-default.xml` + // * hdfs: `hdfs-site.xml` + // * hive: `hive-site.xml` + // * mapred: `mapred-site.xml` + // * pig: `pig.properties` + // * spark: `spark-defaults.conf` + // * yarn: `yarn-site.xml` + // + // For more information, see + // [Cluster properties](/dataproc/docs/concepts/cluster-properties). + map properties = 2; +} + +// Contains cluster daemon metrics, such as HDFS and YARN stats. +// +// **Beta Feature**: This report is available for testing purposes only. It may +// be changed before final release. +message ClusterMetrics { + // The HDFS metrics. + map hdfs_metrics = 1; + + // The YARN metrics. + map yarn_metrics = 2; +} + +// A request to create a cluster. +message CreateClusterRequest { + // Required. The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 3; + + // Required. The cluster to create. + Cluster cluster = 2; +} + +// A request to update a cluster. +message UpdateClusterRequest { + // Required. The ID of the Google Cloud Platform project the + // cluster belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 5; + + // Required. The cluster name. + string cluster_name = 2; + + // Required. The changes to the cluster. + Cluster cluster = 3; + + // Optional. Timeout for graceful YARN decomissioning. Graceful + // decommissioning allows removing nodes from the cluster without + // interrupting jobs in progress. Timeout specifies how long to wait for jobs + // in progress to finish before forcefully removing nodes (and potentially + // interrupting jobs). Default timeout is 0 (for forceful decommission), and + // the maximum allowed timeout is 1 day. + // + // Only supported on Dataproc image versions 1.2 and higher. + google.protobuf.Duration graceful_decommission_timeout = 6; + + // Required. Specifies the path, relative to Cluster, of + // the field to update. For example, to change the number of workers + // in a cluster to 5, the update_mask parameter would be + // specified as config.worker_config.num_instances, + // and the `PATCH` request body would specify the new value, as follows: + // + // { + // "config":{ + // "workerConfig":{ + // "numInstances":"5" + // } + // } + // } + // Similarly, to change the number of preemptible workers in a cluster to 5, the + // update_mask parameter would be config.secondary_worker_config.num_instances, + // and the `PATCH` request body would be set as follows: + // + // { + // "config":{ + // "secondaryWorkerConfig":{ + // "numInstances":"5" + // } + // } + // } + // Note: currently only some fields can be updated: + // |Mask|Purpose| + // |`labels`|Updates labels| + // |`config.worker_config.num_instances`|Resize primary worker group| + // |`config.secondary_worker_config.num_instances`|Resize secondary worker group| + google.protobuf.FieldMask update_mask = 4; +} + +// A request to delete a cluster. +message DeleteClusterRequest { + // Required. The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 3; + + // Required. The cluster name. + string cluster_name = 2; + + // Optional. Specifying the `cluster_uuid` means the RPC should fail + // (with error NOT_FOUND) if cluster with specified UUID does not exist. + string cluster_uuid = 4; +} + +// Request to get the resource representation for a cluster in a project. +message GetClusterRequest { + // Required. The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 3; + + // Required. The cluster name. + string cluster_name = 2; +} + +// A request to list the clusters in a project. +message ListClustersRequest { + // Required. The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 4; + + // Optional. A filter constraining the clusters to list. Filters are + // case-sensitive and have the following syntax: + // + // field = value [AND [field = value]] ... + // + // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`, + // and `[KEY]` is a label key. **value** can be `*` to match all values. + // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`, + // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE` + // contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE` + // contains the `DELETING` and `ERROR` states. + // `clusterName` is the name of the cluster provided at creation time. + // Only the logical `AND` operator is supported; space-separated items are + // treated as having an implicit `AND` operator. + // + // Example filter: + // + // status.state = ACTIVE AND clusterName = mycluster + // AND labels.env = staging AND labels.starred = * + string filter = 5; + + // Optional. The standard List page size. + int32 page_size = 2; + + // Optional. The standard List page token. + string page_token = 3; +} + +// The list of all clusters in a project. +message ListClustersResponse { + // Output-only. The clusters in the project. + repeated Cluster clusters = 1; + + // Output-only. This token is included in the response if there are more + // results to fetch. To fetch additional results, provide this value as the + // `page_token` in a subsequent ListClustersRequest. + string next_page_token = 2; +} + +// A request to collect cluster diagnostic information. +message DiagnoseClusterRequest { + // Required. The ID of the Google Cloud Platform project that the cluster + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 3; + + // Required. The cluster name. + string cluster_name = 2; +} + +// The location of diagnostic output. +message DiagnoseClusterResults { + // Output-only. The Google Cloud Storage URI of the diagnostic output. + // The output report is a plain text file with a summary of collected + // diagnostics. + string output_uri = 1; +} diff --git a/google/cloud/dataproc/v1beta2/dataproc.yaml b/google/cloud/dataproc/v1beta2/dataproc.yaml new file mode 100644 index 00000000..67a40809 --- /dev/null +++ b/google/cloud/dataproc/v1beta2/dataproc.yaml @@ -0,0 +1,51 @@ +type: google.api.Service +config_version: 2 +name: dataproc.googleapis.com +title: Google Cloud Dataproc API + +apis: +- name: google.cloud.dataproc.v1beta2.ClusterController +- name: google.cloud.dataproc.v1beta2.JobController +- name: google.cloud.dataproc.v1beta2.WorkflowTemplateService + +types: +- name: google.cloud.dataproc.v1beta2.DiagnoseClusterResults +- name: google.cloud.dataproc.v1beta2.ClusterOperationMetadata +- name: google.cloud.dataproc.v1beta2.WorkflowMetadata + +documentation: + summary: 'Manages Hadoop-based clusters and jobs on Google Cloud Platform.' + +http: + rules: + - selector: google.longrunning.Operations.ListOperations + get: '/v1beta2/{name=projects/*/regions/*/operations}' + + - selector: google.longrunning.Operations.GetOperation + get: '/v1beta2/{name=projects/*/regions/*/operations/*}' + + - selector: google.longrunning.Operations.DeleteOperation + delete: '/v1beta2/{name=projects/*/regions/*/operations/*}' + + - selector: google.longrunning.Operations.CancelOperation + post: '/v1beta2/{name=projects/*/regions/*/operations/*}:cancel' + + - selector: google.iam.v1.IAMPolicy.SetIamPolicy + post: '/v1beta2/{resource=projects/*/regions/*/clusters/*}:setIamPolicy' + body: '*' + + - selector: google.iam.v1.IAMPolicy.GetIamPolicy + get: '/v1beta2/{resource=projects/*/regions/*/clusters/*}:getIamPolicy' + + - selector: google.iam.v1.IAMPolicy.TestIamPermissions + post: '/v1beta2/{resource=projects/*/regions/*/clusters/*}:testIamPermissions' + body: '*' + + +authentication: + rules: + - selector: '*' + oauth: + canonical_scopes: |- + https://www.googleapis.com/auth/cloud-platform + diff --git a/google/cloud/dataproc/v1beta2/dataproc_gapic.yaml b/google/cloud/dataproc/v1beta2/dataproc_gapic.yaml new file mode 100644 index 00000000..2aecbd8c --- /dev/null +++ b/google/cloud/dataproc/v1beta2/dataproc_gapic.yaml @@ -0,0 +1,644 @@ +type: com.google.api.codegen.ConfigProto +config_schema_version: 1.0.0 +# The settings of generated code in a specific language. +language_settings: + java: + package_name: com.google.cloud.dataproc.v1beta2 + python: + package_name: google.cloud.dataproc_v1beta2.gapic + go: + package_name: cloud.google.com/go/dataproc/apiv1beta2 + csharp: + package_name: Google.Cloud.Dataproc.V1Beta2 + ruby: + package_name: Google::Cloud::Dataproc::V1beta2 + php: + package_name: Google\Cloud\Dataproc\V1beta2 + nodejs: + package_name: dataproc.v1beta2 + domain_layer_location: google-cloud +# The configuration for the license header to put on generated files. +license_header: + # The file containing the copyright line(s). + copyright_file: copyright-google.txt + # The file containing the raw license header without any copyright line(s). + license_file: license-header-apache-2.0.txt +# A list of API interface configurations. +interfaces: + # The fully qualified name of the API interface. +- name: google.cloud.dataproc.v1beta2.ClusterController + # A list of resource collection configurations. + # Consists of a name_pattern and an entity_name. + # The name_pattern is a pattern to describe the names of the resources of this + # collection, using the platform's conventions for URI patterns. A generator + # may use this to generate methods to compose and decompose such names. The + # pattern should use named placeholders as in `shelves/{shelf}/books/{book}`; + # those will be taken as hints for the parameter names of the generated + # methods. If empty, no name methods are generated. + # The entity_name is the name to be used as a basis for generated methods and + # classes. + collections: [] + # Definition for smoke test. + smoke_test: + method: ListClusters + init_fields: + - project_id=$PROJECT_ID + - region="global" + # Definition for retryable codes. + retry_codes_def: + - name: idempotent + retry_codes: + - UNAVAILABLE + - DEADLINE_EXCEEDED + - name: non_idempotent + retry_codes: [] + # Definition for retry/backoff parameters. + retry_params_def: + - name: default + initial_retry_delay_millis: 100 + retry_delay_multiplier: 1.3 + max_retry_delay_millis: 60000 + initial_rpc_timeout_millis: 10000 + rpc_timeout_multiplier: 1 + max_rpc_timeout_millis: 10000 + total_timeout_millis: 300000 + # A list of method configurations. + # Common properties: + # + # name - The simple name of the method. + # + # flattening - Specifies the configuration for parameter flattening. + # Describes the parameter groups for which a generator should produce method + # overloads which allow a client to directly pass request message fields as + # method parameters. This information may or may not be used, depending on + # the target language. + # Consists of groups, which each represent a list of parameters to be + # flattened. Each parameter listed must be a field of the request message. + # + # required_fields - Fields that are always required for a request to be + # valid. + # + # request_object_method - Turns on or off the generation of a method whose + # sole parameter is a request object. Not all languages will generate this + # method. + # + # resource_name_treatment - An enum that specifies how to treat the resource + # name formats defined in the field_name_patterns and + # response_field_name_patterns fields. + # UNSET: default value + # NONE: the collection configs will not be used by the generated code. + # VALIDATE: string fields will be validated by the client against the + # specified resource name formats. + # STATIC_TYPES: the client will use generated types for resource names. + # + # page_streaming - Specifies the configuration for paging. + # Describes information for generating a method which transforms a paging + # list RPC into a stream of resources. + # Consists of a request and a response. + # The request specifies request information of the list method. It defines + # which fields match the paging pattern in the request. The request consists + # of a page_size_field and a token_field. The page_size_field is the name of + # the optional field specifying the maximum number of elements to be + # returned in the response. The token_field is the name of the field in the + # request containing the page token. + # The response specifies response information of the list method. It defines + # which fields match the paging pattern in the response. The response + # consists of a token_field and a resources_field. The token_field is the + # name of the field in the response containing the next page token. The + # resources_field is the name of the field in the response containing the + # list of resources belonging to the page. + # + # retry_codes_name - Specifies the configuration for retryable codes. The + # name must be defined in interfaces.retry_codes_def. + # + # retry_params_name - Specifies the configuration for retry/backoff + # parameters. The name must be defined in interfaces.retry_params_def. + # + # field_name_patterns - Maps the field name of the request type to + # entity_name of interfaces.collections. + # Specifies the string pattern that the field must follow. + # + # timeout_millis - Specifies the default timeout for a non-retrying call. If + # the call is retrying, refer to retry_params_name instead. + methods: + - name: CreateCluster + flattening: + groups: + - parameters: + - project_id + - region + - cluster + required_fields: + - project_id + - region + - cluster + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 30000 + long_running: + return_type: google.cloud.dataproc.v1beta2.Cluster + metadata_type: google.cloud.dataproc.v1beta2.ClusterOperationMetadata + implements_delete: true + implements_cancel: false + initial_poll_delay_millis: 1000 + poll_delay_multiplier: 2 + max_poll_delay_millis: 10000 + total_poll_timeout_millis: 300000 + - name: UpdateCluster + required_fields: + - project_id + - region + - cluster_name + - cluster + - update_mask + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 30000 + long_running: + return_type: google.cloud.dataproc.v1beta2.Cluster + metadata_type: google.cloud.dataproc.v1beta2.ClusterOperationMetadata + implements_delete: true + implements_cancel: false + initial_poll_delay_millis: 1000 + poll_delay_multiplier: 2 + max_poll_delay_millis: 10000 + total_poll_timeout_millis: 300000 + - name: DeleteCluster + flattening: + groups: + - parameters: + - project_id + - region + - cluster_name + required_fields: + - project_id + - region + - cluster_name + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 60000 + long_running: + return_type: google.protobuf.Empty + metadata_type: google.cloud.dataproc.v1beta2.ClusterOperationMetadata + implements_delete: true + implements_cancel: false + initial_poll_delay_millis: 1000 + poll_delay_multiplier: 2 + max_poll_delay_millis: 10000 + total_poll_timeout_millis: 300000 + - name: GetCluster + flattening: + groups: + - parameters: + - project_id + - region + - cluster_name + required_fields: + - project_id + - region + - cluster_name + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 60000 + - name: ListClusters + flattening: + groups: + - parameters: + - project_id + - region + required_fields: + - project_id + - region + request_object_method: true + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: clusters + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 60000 + - name: DiagnoseCluster + flattening: + groups: + - parameters: + - project_id + - region + - cluster_name + required_fields: + - project_id + - region + - cluster_name + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 60000 + long_running: + return_type: google.protobuf.Empty + metadata_type: google.cloud.dataproc.v1beta2.DiagnoseClusterResults + implements_delete: true + implements_cancel: false + initial_poll_delay_millis: 1000 + poll_delay_multiplier: 2 + max_poll_delay_millis: 10000 + total_poll_timeout_millis: 30000 + # The fully qualified name of the API interface. +- name: google.cloud.dataproc.v1beta2.JobController + # A list of resource collection configurations. + # Consists of a name_pattern and an entity_name. + # The name_pattern is a pattern to describe the names of the resources of this + # collection, using the platform's conventions for URI patterns. A generator + # may use this to generate methods to compose and decompose such names. The + # pattern should use named placeholders as in `shelves/{shelf}/books/{book}`; + # those will be taken as hints for the parameter names of the generated + # methods. If empty, no name methods are generated. + # The entity_name is the name to be used as a basis for generated methods and + # classes. + collections: [] + # Definition for retryable codes. + retry_codes_def: + - name: idempotent + retry_codes: + - UNAVAILABLE + - DEADLINE_EXCEEDED + - name: non_idempotent + retry_codes: [] + # Definition for retry/backoff parameters. + retry_params_def: + - name: default + initial_retry_delay_millis: 100 + retry_delay_multiplier: 1.3 + max_retry_delay_millis: 60000 + initial_rpc_timeout_millis: 30000 + rpc_timeout_multiplier: 1 + max_rpc_timeout_millis: 30000 + total_timeout_millis: 900000 + # A list of method configurations. + # Common properties: + # + # name - The simple name of the method. + # + # flattening - Specifies the configuration for parameter flattening. + # Describes the parameter groups for which a generator should produce method + # overloads which allow a client to directly pass request message fields as + # method parameters. This information may or may not be used, depending on + # the target language. + # Consists of groups, which each represent a list of parameters to be + # flattened. Each parameter listed must be a field of the request message. + # + # required_fields - Fields that are always required for a request to be + # valid. + # + # request_object_method - Turns on or off the generation of a method whose + # sole parameter is a request object. Not all languages will generate this + # method. + # + # resource_name_treatment - An enum that specifies how to treat the resource + # name formats defined in the field_name_patterns and + # response_field_name_patterns fields. + # UNSET: default value + # NONE: the collection configs will not be used by the generated code. + # VALIDATE: string fields will be validated by the client against the + # specified resource name formats. + # STATIC_TYPES: the client will use generated types for resource names. + # + # page_streaming - Specifies the configuration for paging. + # Describes information for generating a method which transforms a paging + # list RPC into a stream of resources. + # Consists of a request and a response. + # The request specifies request information of the list method. It defines + # which fields match the paging pattern in the request. The request consists + # of a page_size_field and a token_field. The page_size_field is the name of + # the optional field specifying the maximum number of elements to be + # returned in the response. The token_field is the name of the field in the + # request containing the page token. + # The response specifies response information of the list method. It defines + # which fields match the paging pattern in the response. The response + # consists of a token_field and a resources_field. The token_field is the + # name of the field in the response containing the next page token. The + # resources_field is the name of the field in the response containing the + # list of resources belonging to the page. + # + # retry_codes_name - Specifies the configuration for retryable codes. The + # name must be defined in interfaces.retry_codes_def. + # + # retry_params_name - Specifies the configuration for retry/backoff + # parameters. The name must be defined in interfaces.retry_params_def. + # + # field_name_patterns - Maps the field name of the request type to + # entity_name of interfaces.collections. + # Specifies the string pattern that the field must follow. + # + # timeout_millis - Specifies the default timeout for a non-retrying call. If + # the call is retrying, refer to retry_params_name instead. + methods: + - name: SubmitJob + flattening: + groups: + - parameters: + - project_id + - region + - job + required_fields: + - project_id + - region + - job + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 30000 + - name: GetJob + flattening: + groups: + - parameters: + - project_id + - region + - job_id + required_fields: + - project_id + - region + - job_id + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 10000 + - name: ListJobs + flattening: + groups: + - parameters: + - project_id + - region + required_fields: + - project_id + - region + request_object_method: true + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: jobs + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 30000 + - name: UpdateJob + required_fields: + - project_id + - region + - job_id + - job + - update_mask + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 30000 + - name: CancelJob + flattening: + groups: + - parameters: + - project_id + - region + - job_id + required_fields: + - project_id + - region + - job_id + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + timeout_millis: 30000 + - name: DeleteJob + flattening: + groups: + - parameters: + - project_id + - region + - job_id + required_fields: + - project_id + - region + - job_id + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 30000 + # The fully qualified name of the API interface. +- name: google.cloud.dataproc.v1beta2.WorkflowTemplateService + # A list of resource collection configurations. + # Consists of a name_pattern and an entity_name. + # The name_pattern is a pattern to describe the names of the resources of this + # collection, using the platform's conventions for URI patterns. A generator + # may use this to generate methods to compose and decompose such names. The + # pattern should use named placeholders as in `shelves/{shelf}/books/{book}`; + # those will be taken as hints for the parameter names of the generated + # methods. If empty, no name methods are generated. + # The entity_name is the name to be used as a basis for generated methods and + # classes. + collections: + - name_pattern: projects/{project}/regions/{region} + entity_name: region + - name_pattern: projects/{project}/regions/{region}/workflowTemplates/{workflow_template} + entity_name: workflow_template + # Definition for retryable codes. + retry_codes_def: + - name: idempotent + retry_codes: + - UNAVAILABLE + - DEADLINE_EXCEEDED + - name: non_idempotent + retry_codes: [] + # Definition for retry/backoff parameters. + retry_params_def: + - name: default + initial_retry_delay_millis: 100 + retry_delay_multiplier: 1.3 + max_retry_delay_millis: 60000 + initial_rpc_timeout_millis: 20000 + rpc_timeout_multiplier: 1 + max_rpc_timeout_millis: 20000 + total_timeout_millis: 600000 + # A list of method configurations. + # Common properties: + # + # name - The simple name of the method. + # + # flattening - Specifies the configuration for parameter flattening. + # Describes the parameter groups for which a generator should produce method + # overloads which allow a client to directly pass request message fields as + # method parameters. This information may or may not be used, depending on + # the target language. + # Consists of groups, which each represent a list of parameters to be + # flattened. Each parameter listed must be a field of the request message. + # + # required_fields - Fields that are always required for a request to be + # valid. + # + # request_object_method - Turns on or off the generation of a method whose + # sole parameter is a request object. Not all languages will generate this + # method. + # + # resource_name_treatment - An enum that specifies how to treat the resource + # name formats defined in the field_name_patterns and + # response_field_name_patterns fields. + # UNSET: default value + # NONE: the collection configs will not be used by the generated code. + # VALIDATE: string fields will be validated by the client against the + # specified resource name formats. + # STATIC_TYPES: the client will use generated types for resource names. + # + # page_streaming - Specifies the configuration for paging. + # Describes information for generating a method which transforms a paging + # list RPC into a stream of resources. + # Consists of a request and a response. + # The request specifies request information of the list method. It defines + # which fields match the paging pattern in the request. The request consists + # of a page_size_field and a token_field. The page_size_field is the name of + # the optional field specifying the maximum number of elements to be + # returned in the response. The token_field is the name of the field in the + # request containing the page token. + # The response specifies response information of the list method. It defines + # which fields match the paging pattern in the response. The response + # consists of a token_field and a resources_field. The token_field is the + # name of the field in the response containing the next page token. The + # resources_field is the name of the field in the response containing the + # list of resources belonging to the page. + # + # retry_codes_name - Specifies the configuration for retryable codes. The + # name must be defined in interfaces.retry_codes_def. + # + # retry_params_name - Specifies the configuration for retry/backoff + # parameters. The name must be defined in interfaces.retry_params_def. + # + # field_name_patterns - Maps the field name of the request type to + # entity_name of interfaces.collections. + # Specifies the string pattern that the field must follow. + # + # timeout_millis - Specifies the default timeout for a non-retrying call. If + # the call is retrying, refer to retry_params_name instead. + methods: + - name: CreateWorkflowTemplate + flattening: + groups: + - parameters: + - parent + - template + required_fields: + - parent + - template + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + resource_name_treatment: STATIC_TYPES + field_name_patterns: + parent: region + timeout_millis: 60000 + - name: GetWorkflowTemplate + flattening: + groups: + - parameters: + - name + required_fields: + - name + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + resource_name_treatment: STATIC_TYPES + field_name_patterns: + name: workflow_template + timeout_millis: 60000 + - name: InstantiateWorkflowTemplate + flattening: + groups: + - parameters: + - name + - instance_id + required_fields: + - name + request_object_method: true + retry_codes_name: non_idempotent + retry_params_name: default + resource_name_treatment: STATIC_TYPES + field_name_patterns: + name: workflow_template + timeout_millis: 60000 + long_running: + return_type: google.protobuf.Empty + metadata_type: google.cloud.dataproc.v1beta2.WorkflowMetadata + implements_delete: true + implements_cancel: true + initial_poll_delay_millis: 1000 + poll_delay_multiplier: 2 + max_poll_delay_millis: 10000 + total_poll_timeout_millis: 43200000 + - name: UpdateWorkflowTemplate + flattening: + groups: + - parameters: + - template + required_fields: + - template + request_object_method: false + retry_codes_name: idempotent + retry_params_name: default + timeout_millis: 60000 + - name: ListWorkflowTemplates + flattening: + groups: + - parameters: + - parent + required_fields: + - parent + request_object_method: true + page_streaming: + request: + page_size_field: page_size + token_field: page_token + response: + token_field: next_page_token + resources_field: templates + retry_codes_name: idempotent + retry_params_name: default + resource_name_treatment: STATIC_TYPES + field_name_patterns: + parent: region + timeout_millis: 60000 + - name: DeleteWorkflowTemplate + flattening: + groups: + - parameters: + - name + required_fields: + - name + request_object_method: true + retry_codes_name: idempotent + retry_params_name: default + resource_name_treatment: STATIC_TYPES + field_name_patterns: + name: workflow_template + timeout_millis: 60000 +resource_name_generation: +- message_name: WorkflowTemplate + field_entity_map: + name: workflow_template +- message_name: CreateWorkflowTemplateRequest + field_entity_map: + parent: region +- message_name: GetWorkflowTemplateRequest + field_entity_map: + name: workflow_template +- message_name: InstantiateWorkflowTemplateRequest + field_entity_map: + name: workflow_template +- message_name: ListWorkflowTemplatesRequest + field_entity_map: + parent: region +- message_name: DeleteWorkflowTemplateRequest + field_entity_map: + name: workflow_template diff --git a/google/cloud/dataproc/v1beta2/jobs.proto b/google/cloud/dataproc/v1beta2/jobs.proto new file mode 100644 index 00000000..3ea965a6 --- /dev/null +++ b/google/cloud/dataproc/v1beta2/jobs.proto @@ -0,0 +1,740 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.dataproc.v1beta2; + +import "google/api/annotations.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/field_mask.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc"; +option java_multiple_files = true; +option java_outer_classname = "JobsProto"; +option java_package = "com.google.cloud.dataproc.v1beta2"; + + +// The JobController provides methods to manage jobs. +service JobController { + // Submits a job to a cluster. + rpc SubmitJob(SubmitJobRequest) returns (Job) { + option (google.api.http) = { post: "/v1beta2/projects/{project_id}/regions/{region}/jobs:submit" body: "*" }; + } + + // Gets the resource representation for a job in a project. + rpc GetJob(GetJobRequest) returns (Job) { + option (google.api.http) = { get: "/v1beta2/projects/{project_id}/regions/{region}/jobs/{job_id}" }; + } + + // Lists regions/{region}/jobs in a project. + rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) { + option (google.api.http) = { get: "/v1beta2/projects/{project_id}/regions/{region}/jobs" }; + } + + // Updates a job in a project. + rpc UpdateJob(UpdateJobRequest) returns (Job) { + option (google.api.http) = { patch: "/v1beta2/projects/{project_id}/regions/{region}/jobs/{job_id}" body: "job" }; + } + + // Starts a job cancellation request. To access the job resource + // after cancellation, call + // [regions/{region}/jobs.list](/dataproc/docs/reference/rest/v1beta2/projects.regions.jobs/list) or + // [regions/{region}/jobs.get](/dataproc/docs/reference/rest/v1beta2/projects.regions.jobs/get). + rpc CancelJob(CancelJobRequest) returns (Job) { + option (google.api.http) = { post: "/v1beta2/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" body: "*" }; + } + + // Deletes the job from the project. If the job is active, the delete fails, + // and the response returns `FAILED_PRECONDITION`. + rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { delete: "/v1beta2/projects/{project_id}/regions/{region}/jobs/{job_id}" }; + } +} + +// The runtime logging config of the job. +message LoggingConfig { + // The Log4j level for job execution. When running an + // [Apache Hive](http://hive.apache.org/) job, Cloud + // Dataproc configures the Hive client to an equivalent verbosity level. + enum Level { + // Level is unspecified. Use default level for log4j. + LEVEL_UNSPECIFIED = 0; + + // Use ALL level for log4j. + ALL = 1; + + // Use TRACE level for log4j. + TRACE = 2; + + // Use DEBUG level for log4j. + DEBUG = 3; + + // Use INFO level for log4j. + INFO = 4; + + // Use WARN level for log4j. + WARN = 5; + + // Use ERROR level for log4j. + ERROR = 6; + + // Use FATAL level for log4j. + FATAL = 7; + + // Turn off log4j. + OFF = 8; + } + + // The per-package log levels for the driver. This may include + // "root" package name to configure rootLogger. + // Examples: + // 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' + map driver_log_levels = 2; +} + +// A Cloud Dataproc job for running +// [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) +// jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). +message HadoopJob { + // Required. Indicates the location of the driver's main class. Specify + // either the jar file that contains the main class or the main class name. + // To specify both, add the jar file to `jar_file_uris`, and then specify + // the main class name in this property. + oneof driver { + // The HCFS URI of the jar file containing the main class. + // Examples: + // 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' + // 'hdfs:/tmp/test-samples/custom-wordcount.jar' + // 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' + string main_jar_file_uri = 1; + + // The name of the driver's main class. The jar file containing the class + // must be in the default CLASSPATH or specified in `jar_file_uris`. + string main_class = 2; + } + + // Optional. The arguments to pass to the driver. Do not + // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job + // properties, since a collision may occur that causes an incorrect job + // submission. + repeated string args = 3; + + // Optional. Jar file URIs to add to the CLASSPATHs of the + // Hadoop driver and tasks. + repeated string jar_file_uris = 4; + + // Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied + // to the working directory of Hadoop drivers and distributed tasks. Useful + // for naively parallel tasks. + repeated string file_uris = 5; + + // Optional. HCFS URIs of archives to be extracted in the working directory of + // Hadoop drivers and tasks. Supported file types: + // .jar, .tar, .tar.gz, .tgz, or .zip. + repeated string archive_uris = 6; + + // Optional. A mapping of property names to values, used to configure Hadoop. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in /etc/hadoop/conf/*-site and + // classes in user code. + map properties = 7; + + // Optional. The runtime log config for job execution. + LoggingConfig logging_config = 8; +} + +// A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/) +// applications on YARN. +message SparkJob { + // Required. The specification of the main method to call to drive the job. + // Specify either the jar file that contains the main class or the main class + // name. To pass both a main jar and a main class in that jar, add the jar to + // `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`. + oneof driver { + // The HCFS URI of the jar file that contains the main class. + string main_jar_file_uri = 1; + + // The name of the driver's main class. The jar file that contains the class + // must be in the default CLASSPATH or specified in `jar_file_uris`. + string main_class = 2; + } + + // Optional. The arguments to pass to the driver. Do not include arguments, + // such as `--conf`, that can be set as job properties, since a collision may + // occur that causes an incorrect job submission. + repeated string args = 3; + + // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the + // Spark driver and tasks. + repeated string jar_file_uris = 4; + + // Optional. HCFS URIs of files to be copied to the working directory of + // Spark drivers and distributed tasks. Useful for naively parallel tasks. + repeated string file_uris = 5; + + // Optional. HCFS URIs of archives to be extracted in the working directory + // of Spark drivers and tasks. Supported file types: + // .jar, .tar, .tar.gz, .tgz, and .zip. + repeated string archive_uris = 6; + + // Optional. A mapping of property names to values, used to configure Spark. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in + // /etc/spark/conf/spark-defaults.conf and classes in user code. + map properties = 7; + + // Optional. The runtime log config for job execution. + LoggingConfig logging_config = 8; +} + +// A Cloud Dataproc job for running +// [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html) +// applications on YARN. +message PySparkJob { + // Required. The HCFS URI of the main Python file to use as the driver. Must + // be a .py file. + string main_python_file_uri = 1; + + // Optional. The arguments to pass to the driver. Do not include arguments, + // such as `--conf`, that can be set as job properties, since a collision may + // occur that causes an incorrect job submission. + repeated string args = 2; + + // Optional. HCFS file URIs of Python files to pass to the PySpark + // framework. Supported file types: .py, .egg, and .zip. + repeated string python_file_uris = 3; + + // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the + // Python driver and tasks. + repeated string jar_file_uris = 4; + + // Optional. HCFS URIs of files to be copied to the working directory of + // Python drivers and distributed tasks. Useful for naively parallel tasks. + repeated string file_uris = 5; + + // Optional. HCFS URIs of archives to be extracted in the working directory of + // .jar, .tar, .tar.gz, .tgz, and .zip. + repeated string archive_uris = 6; + + // Optional. A mapping of property names to values, used to configure PySpark. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in + // /etc/spark/conf/spark-defaults.conf and classes in user code. + map properties = 7; + + // Optional. The runtime log config for job execution. + LoggingConfig logging_config = 8; +} + +// A list of queries to run on a cluster. +message QueryList { + // Required. The queries to execute. You do not need to terminate a query + // with a semicolon. Multiple queries can be specified in one string + // by separating each with a semicolon. Here is an example of an Cloud + // Dataproc API snippet that uses a QueryList to specify a HiveJob: + // + // "hiveJob": { + // "queryList": { + // "queries": [ + // "query1", + // "query2", + // "query3;query4", + // ] + // } + // } + repeated string queries = 1; +} + +// A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/) +// queries on YARN. +message HiveJob { + // Required. The sequence of Hive queries to execute, specified as either + // an HCFS file URI or a list of queries. + oneof queries { + // The HCFS URI of the script that contains Hive queries. + string query_file_uri = 1; + + // A list of queries. + QueryList query_list = 2; + } + + // Optional. Whether to continue executing queries if a query fails. + // The default value is `false`. Setting to `true` can be useful when executing + // independent parallel queries. + bool continue_on_failure = 3; + + // Optional. Mapping of query variable names to values (equivalent to the + // Hive command: `SET name="value";`). + map script_variables = 4; + + // Optional. A mapping of property names and values, used to configure Hive. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, + // /etc/hive/conf/hive-site.xml, and classes in user code. + map properties = 5; + + // Optional. HCFS URIs of jar files to add to the CLASSPATH of the + // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes + // and UDFs. + repeated string jar_file_uris = 6; +} + +// A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/) +// queries. +message SparkSqlJob { + // Required. The sequence of Spark SQL queries to execute, specified as + // either an HCFS file URI or as a list of queries. + oneof queries { + // The HCFS URI of the script that contains SQL queries. + string query_file_uri = 1; + + // A list of queries. + QueryList query_list = 2; + } + + // Optional. Mapping of query variable names to values (equivalent to the + // Spark SQL command: SET `name="value";`). + map script_variables = 3; + + // Optional. A mapping of property names to values, used to configure + // Spark SQL's SparkConf. Properties that conflict with values set by the + // Cloud Dataproc API may be overwritten. + map properties = 4; + + // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH. + repeated string jar_file_uris = 56; + + // Optional. The runtime log config for job execution. + LoggingConfig logging_config = 6; +} + +// A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/) +// queries on YARN. +message PigJob { + // Required. The sequence of Pig queries to execute, specified as an HCFS + // file URI or a list of queries. + oneof queries { + // The HCFS URI of the script that contains the Pig queries. + string query_file_uri = 1; + + // A list of queries. + QueryList query_list = 2; + } + + // Optional. Whether to continue executing queries if a query fails. + // The default value is `false`. Setting to `true` can be useful when executing + // independent parallel queries. + bool continue_on_failure = 3; + + // Optional. Mapping of query variable names to values (equivalent to the Pig + // command: `name=[value]`). + map script_variables = 4; + + // Optional. A mapping of property names to values, used to configure Pig. + // Properties that conflict with values set by the Cloud Dataproc API may be + // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, + // /etc/pig/conf/pig.properties, and classes in user code. + map properties = 5; + + // Optional. HCFS URIs of jar files to add to the CLASSPATH of + // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. + repeated string jar_file_uris = 6; + + // Optional. The runtime log config for job execution. + LoggingConfig logging_config = 7; +} + +// Cloud Dataproc job config. +message JobPlacement { + // Required. The name of the cluster where the job will be submitted. + string cluster_name = 1; + + // Output-only. A cluster UUID generated by the Cloud Dataproc service when + // the job is submitted. + string cluster_uuid = 2; +} + +// Cloud Dataproc job status. +message JobStatus { + // The job state. + enum State { + // The job state is unknown. + STATE_UNSPECIFIED = 0; + + // The job is pending; it has been submitted, but is not yet running. + PENDING = 1; + + // Job has been received by the service and completed initial setup; + // it will soon be submitted to the cluster. + SETUP_DONE = 8; + + // The job is running on the cluster. + RUNNING = 2; + + // A CancelJob request has been received, but is pending. + CANCEL_PENDING = 3; + + // Transient in-flight resources have been canceled, and the request to + // cancel the running job has been issued to the cluster. + CANCEL_STARTED = 7; + + // The job cancellation was successful. + CANCELLED = 4; + + // The job has completed successfully. + DONE = 5; + + // The job has completed, but encountered an error. + ERROR = 6; + + // Job attempt has failed. The detail field contains failure details for + // this attempt. + // + // Applies to restartable jobs only. + ATTEMPT_FAILURE = 9; + } + + enum Substate { + UNSPECIFIED = 0; + + // The Job is submitted to the agent. + // + // Applies to RUNNING state. + SUBMITTED = 1; + + // The Job has been received and is awaiting execution (it may be waiting + // for a condition to be met). See the "details" field for the reason for + // the delay. + // + // Applies to RUNNING state. + QUEUED = 2; + + // The agent-reported status is out of date, which may be caused by a + // loss of communication between the agent and Cloud Dataproc. If the + // agent does not send a timely update, the job will fail. + // + // Applies to RUNNING state. + STALE_STATUS = 3; + } + + // Output-only. A state message specifying the overall job state. + State state = 1; + + // Output-only. Optional job state details, such as an error + // description if the state is ERROR. + string details = 2; + + // Output-only. The time when this state was entered. + google.protobuf.Timestamp state_start_time = 6; + + // Output-only. Additional state information, which includes + // status reported by the agent. + Substate substate = 7; +} + +// Encapsulates the full scoping used to reference a job. +message JobReference { + // Required. The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // Optional. The job ID, which must be unique within the project. The job ID + // is generated by the server upon job submission or provided by the user as a + // means to perform retries without creating duplicate jobs. The ID must + // contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or + // hyphens (-). The maximum length is 100 characters. + string job_id = 2; +} + +// A YARN application created by a job. Application information is a subset of +// org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto. +// +// **Beta Feature**: This report is available for testing purposes only. It may +// be changed before final release. +message YarnApplication { + // The application state, corresponding to + // YarnProtos.YarnApplicationStateProto. + enum State { + // Status is unspecified. + STATE_UNSPECIFIED = 0; + + // Status is NEW. + NEW = 1; + + // Status is NEW_SAVING. + NEW_SAVING = 2; + + // Status is SUBMITTED. + SUBMITTED = 3; + + // Status is ACCEPTED. + ACCEPTED = 4; + + // Status is RUNNING. + RUNNING = 5; + + // Status is FINISHED. + FINISHED = 6; + + // Status is FAILED. + FAILED = 7; + + // Status is KILLED. + KILLED = 8; + } + + // Required. The application name. + string name = 1; + + // Required. The application state. + State state = 2; + + // Required. The numerical progress of the application, from 1 to 100. + float progress = 3; + + // Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or + // TimelineServer that provides application-specific information. The URL uses + // the internal hostname, and requires a proxy server for resolution and, + // possibly, access. + string tracking_url = 4; +} + +// A Cloud Dataproc job resource. +message Job { + // Optional. The fully qualified reference to the job, which can be used to + // obtain the equivalent REST path of the job resource. If this property + // is not specified when a job is created, the server generates a + // job_id. + JobReference reference = 1; + + // Required. Job information, including how, when, and where to + // run the job. + JobPlacement placement = 2; + + // Required. The application/framework-specific portion of the job. + oneof type_job { + // Job is a Hadoop job. + HadoopJob hadoop_job = 3; + + // Job is a Spark job. + SparkJob spark_job = 4; + + // Job is a Pyspark job. + PySparkJob pyspark_job = 5; + + // Job is a Hive job. + HiveJob hive_job = 6; + + // Job is a Pig job. + PigJob pig_job = 7; + + // Job is a SparkSql job. + SparkSqlJob spark_sql_job = 12; + } + + // Output-only. The job status. Additional application-specific + // status information may be contained in the type_job + // and yarn_applications fields. + JobStatus status = 8; + + // Output-only. The previous job status. + repeated JobStatus status_history = 13; + + // Output-only. The collection of YARN applications spun up by this job. + // + // **Beta** Feature: This report is available for testing purposes only. It may + // be changed before final release. + repeated YarnApplication yarn_applications = 9; + + // Output-only. A URI pointing to the location of the stdout of the job's + // driver program. + string driver_output_resource_uri = 17; + + // Output-only. If present, the location of miscellaneous control files + // which may be used as part of job setup and handling. If not present, + // control files may be placed in the same location as `driver_output_uri`. + string driver_control_files_uri = 15; + + // Optional. The labels to associate with this job. + // Label **keys** must contain 1 to 63 characters, and must conform to + // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // Label **values** may be empty, but, if present, must contain 1 to 63 + // characters, and must conform to [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // No more than 32 labels can be associated with a job. + map labels = 18; + + // Optional. Job scheduling configuration. + JobScheduling scheduling = 20; +} + +// Job scheduling options. +// +// **Beta Feature**: These options are available for testing purposes only. +// They may be changed before final release. +message JobScheduling { + // Optional. Maximum number of times per hour a driver may be restarted as + // a result of driver terminating with non-zero code before job is + // reported failed. + // + // A job may be reported as thrashing if driver exits with non-zero code + // 4 times within 10 minute window. + // + // Maximum value is 10. + int32 max_failures_per_hour = 1; +} + +// A request to submit a job. +message SubmitJobRequest { + // Required. The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 3; + + // Required. The job resource. + Job job = 2; +} + +// A request to get the resource representation for a job in a project. +message GetJobRequest { + // Required. The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 3; + + // Required. The job ID. + string job_id = 2; +} + +// A request to list jobs in a project. +message ListJobsRequest { + // A matcher that specifies categories of job states. + enum JobStateMatcher { + // Match all jobs, regardless of state. + ALL = 0; + + // Only match jobs in non-terminal states: PENDING, RUNNING, or + // CANCEL_PENDING. + ACTIVE = 1; + + // Only match jobs in terminal states: CANCELLED, DONE, or ERROR. + NON_ACTIVE = 2; + } + + // Required. The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 6; + + // Optional. The number of results to return in each response. + int32 page_size = 2; + + // Optional. The page token, returned by a previous call, to request the + // next page of results. + string page_token = 3; + + // Optional. If set, the returned jobs list includes only jobs that were + // submitted to the named cluster. + string cluster_name = 4; + + // Optional. Specifies enumerated categories of jobs to list. + // (default = match ALL jobs). + // + // If `filter` is provided, `jobStateMatcher` will be ignored. + JobStateMatcher job_state_matcher = 5; + + // Optional. A filter constraining the jobs to list. Filters are + // case-sensitive and have the following syntax: + // + // [field = value] AND [field [= value]] ... + // + // where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label + // key. **value** can be `*` to match all values. + // `status.state` can be either `ACTIVE` or `NON_ACTIVE`. + // Only the logical `AND` operator is supported; space-separated items are + // treated as having an implicit `AND` operator. + // + // Example filter: + // + // status.state = ACTIVE AND labels.env = staging AND labels.starred = * + string filter = 7; +} + +// A request to update a job. +message UpdateJobRequest { + // Required. The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 2; + + // Required. The job ID. + string job_id = 3; + + // Required. The changes to the job. + Job job = 4; + + // Required. Specifies the path, relative to Job, of + // the field to update. For example, to update the labels of a Job the + // update_mask parameter would be specified as + // labels, and the `PATCH` request body would specify the new + // value. Note: Currently, labels is the only + // field that can be updated. + google.protobuf.FieldMask update_mask = 5; +} + +// A list of jobs in a project. +message ListJobsResponse { + // Output-only. Jobs list. + repeated Job jobs = 1; + + // Optional. This token is included in the response if there are more results + // to fetch. To fetch additional results, provide this value as the + // `page_token` in a subsequent ListJobsRequest. + string next_page_token = 2; +} + +// A request to cancel a job. +message CancelJobRequest { + // Required. The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 3; + + // Required. The job ID. + string job_id = 2; +} + +// A request to delete a job. +message DeleteJobRequest { + // Required. The ID of the Google Cloud Platform project that the job + // belongs to. + string project_id = 1; + + // Required. The Cloud Dataproc region in which to handle the request. + string region = 3; + + // Required. The job ID. + string job_id = 2; +} diff --git a/google/cloud/dataproc/v1beta2/operations.proto b/google/cloud/dataproc/v1beta2/operations.proto new file mode 100644 index 00000000..483b09c8 --- /dev/null +++ b/google/cloud/dataproc/v1beta2/operations.proto @@ -0,0 +1,83 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.dataproc.v1beta2; + +import "google/api/annotations.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc"; +option java_multiple_files = true; +option java_outer_classname = "OperationsProto"; +option java_package = "com.google.cloud.dataproc.v1beta2"; + + +// The status of the operation. +message ClusterOperationStatus { + // The operation state. + enum State { + // Unused. + UNKNOWN = 0; + + // The operation has been created. + PENDING = 1; + + // The operation is running. + RUNNING = 2; + + // The operation is done; either cancelled or completed. + DONE = 3; + } + + // Output-only. A message containing the operation state. + State state = 1; + + // Output-only. A message containing the detailed operation state. + string inner_state = 2; + + // Output-only.A message containing any operation metadata details. + string details = 3; + + // Output-only. The time this state was entered. + google.protobuf.Timestamp state_start_time = 4; +} + +// Metadata describing the operation. +message ClusterOperationMetadata { + // Output-only. Name of the cluster for the operation. + string cluster_name = 7; + + // Output-only. Cluster UUID for the operation. + string cluster_uuid = 8; + + // Output-only. Current operation status. + ClusterOperationStatus status = 9; + + // Output-only. The previous operation status. + repeated ClusterOperationStatus status_history = 10; + + // Output-only. The operation type. + string operation_type = 11; + + // Output-only. Short description of operation. + string description = 12; + + // Output-only. Labels associated with the operation + map labels = 13; + + // Output-only. Errors encountered during operation execution. + repeated string warnings = 14; +} diff --git a/google/cloud/dataproc/v1beta2/workflow_templates.proto b/google/cloud/dataproc/v1beta2/workflow_templates.proto new file mode 100644 index 00000000..c38d3650 --- /dev/null +++ b/google/cloud/dataproc/v1beta2/workflow_templates.proto @@ -0,0 +1,427 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.dataproc.v1beta2; + +import "google/api/annotations.proto"; +import "google/cloud/dataproc/v1beta2/clusters.proto"; +import "google/cloud/dataproc/v1beta2/jobs.proto"; +import "google/longrunning/operations.proto"; +import "google/protobuf/empty.proto"; +import "google/protobuf/timestamp.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/dataproc/v1beta2;dataproc"; +option java_multiple_files = true; +option java_outer_classname = "WorkflowTemplatesProto"; +option java_package = "com.google.cloud.dataproc.v1beta2"; + + +// The API interface for managing Workflow Templates in the +// Google Cloud Dataproc API. +service WorkflowTemplateService { + // Creates new workflow template. + rpc CreateWorkflowTemplate(CreateWorkflowTemplateRequest) returns (WorkflowTemplate) { + option (google.api.http) = { post: "/v1beta2/{parent=projects/*/regions/*}/workflowTemplates" body: "template" }; + } + + // Retrieves the latest workflow template. + // + // Can retrieve previously instantiated template by specifying optional + // version parameter. + rpc GetWorkflowTemplate(GetWorkflowTemplateRequest) returns (WorkflowTemplate) { + option (google.api.http) = { get: "/v1beta2/{name=projects/*/regions/*/workflowTemplates/*}" }; + } + + // Instantiates a template and begins execution. + // + // The returned Operation can be used to track execution of + // workflow by polling + // [google.cloud.dataproc.v1beta2.OperationService.GetOperation][]. + // The Operation will complete when entire workflow is finished. + // + // The running workflow can be aborted via + // [google.cloud.dataproc.v1beta2.OperationService.CancelOperation][]. + // + // The [google.cloud.dataproc.v1beta2.Operation.metadata][] will always be + // [google.cloud.dataproc.v1beta2.WorkflowMetadata][google.cloud.dataproc.v1beta2.WorkflowMetadata]. + // + // The [google.cloud.dataproc.v1beta2.Operation.result][] will always be + // [google.protobuf.Empty][google.protobuf.Empty]. + rpc InstantiateWorkflowTemplate(InstantiateWorkflowTemplateRequest) returns (google.longrunning.Operation) { + option (google.api.http) = { post: "/v1beta2/{name=projects/*/regions/*/workflowTemplates/*}:instantiate" body: "*" }; + } + + // Updates (replaces) workflow template. The updated template + // must contain version that matches the current server version. + rpc UpdateWorkflowTemplate(UpdateWorkflowTemplateRequest) returns (WorkflowTemplate) { + option (google.api.http) = { put: "/v1beta2/{template.name=projects/*/regions/*/workflowTemplates/*}" body: "template" }; + } + + // Lists workflows that match the specified filter in the request. + rpc ListWorkflowTemplates(ListWorkflowTemplatesRequest) returns (ListWorkflowTemplatesResponse) { + option (google.api.http) = { get: "/v1beta2/{parent=projects/*/regions/*}/workflowTemplates" }; + } + + // Deletes a workflow template. It does not cancel in-progress workflows. + rpc DeleteWorkflowTemplate(DeleteWorkflowTemplateRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { delete: "/v1beta2/{name=projects/*/regions/*/workflowTemplates/*}" }; + } +} + +// A Cloud Dataproc workflow template resource. +message WorkflowTemplate { + // Required. The template id. + string id = 2; + + // Output only. The "resource name" of the template, as described + // in https://cloud.google.com/apis/design/resource_names of the form + // `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}` + string name = 1; + + // Optional. Used to perform a consistent read-modify-write. + // + // This field should be left blank for a `CreateWorkflowTemplate` request. It + // is required for an `UpdateWorkflowTemplate` request, and must match the + // current server version. A typical update template flow would fetch the + // current template with a `GetWorkflowTemplate` request, which will return + // the current template with the `version` field filled in with the + // current server version. The user updates other fields in the template, + // then returns it as part of the `UpdateWorkflowTemplate` request. + int32 version = 3; + + // Output only. The time template was created. + google.protobuf.Timestamp create_time = 4; + + // Output only. The time template was last updated. + google.protobuf.Timestamp update_time = 5; + + // Optional. The labels to associate with this template. These labels + // will be propagated to all jobs and clusters created by the workflow + // instance. + // + // Label **keys** must contain 1 to 63 characters, and must conform to + // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // + // Label **values** may be empty, but, if present, must contain 1 to 63 + // characters, and must conform to + // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). + // + // No more than 32 labels can be associated with a template. + map labels = 6; + + // Required. WorkflowTemplate scheduling information. + WorkflowTemplatePlacement placement = 7; + + // Required. The Directed Acyclic Graph of Jobs to submit. + repeated OrderedJob jobs = 8; +} + +// Specifies workflow execution target. +// +// Either `managed_cluster` or `cluster_selector` is required. +message WorkflowTemplatePlacement { + oneof placement { + // Optional. A cluster that is managed by the workflow. + ManagedCluster managed_cluster = 1; + + // Optional. A selector that chooses target cluster for jobs based + // on metadata. + // + // The selector is evaluated at the time each job is submitted. + ClusterSelector cluster_selector = 2; + } +} + +// Cluster that is managed by the workflow. +message ManagedCluster { + // Required. The cluster name. Cluster names within a project must be + // unique. Names from deleted clusters can be reused. + string cluster_name = 2; + + // Required. The cluster configuration. + ClusterConfig config = 3; + + // Optional. The labels to associate with this cluster. + // + // Label keys must be between 1 and 63 characters long, and must conform to + // the following PCRE regular expression: + // [\p{Ll}\p{Lo}][\p{Ll}\p{Lo}\p{N}_-]{0,62} + // + // Label values must be between 1 and 63 characters long, and must conform to + // the following PCRE regular expression: [\p{Ll}\p{Lo}\p{N}_-]{0,63} + // + // No more than 64 labels can be associated with a given cluster. + map labels = 4; +} + +// A selector that chooses target cluster for jobs based on metadata. +message ClusterSelector { + // Optional. The zone where workflow process executes. This parameter does not + // affect the selection of the cluster. + // + // If unspecified, the zone of the first cluster matching the selector + // is used. + string zone = 1; + + // Required. The cluster labels. Cluster must have all labels + // to match. + map cluster_labels = 2; +} + +message OrderedJob { + // Required. The step id. The id must be unique among all jobs + // within the template. + // + // The step id is used as prefix for job id, as job `workflow-step-id` label, + // and in prerequisite_step_ids field from other steps. + string step_id = 1; + + // Required. The job definition. + oneof job_type { + // Job is a Hadoop job. + HadoopJob hadoop_job = 2; + + // Job is a Spark job. + SparkJob spark_job = 3; + + // Job is a Pyspark job. + PySparkJob pyspark_job = 4; + + // Job is a Hive job. + HiveJob hive_job = 5; + + // Job is a Pig job. + PigJob pig_job = 6; + + // Job is a SparkSql job. + SparkSqlJob spark_sql_job = 7; + } + + // Optional. The labels to associate with this job. + // + // Label keys must be between 1 and 63 characters long, and must conform to + // the following regular expression: + // [\p{Ll}\p{Lo}][\p{Ll}\p{Lo}\p{N}_-]{0,62} + // + // Label values must be between 1 and 63 characters long, and must conform to + // the following regular expression: [\p{Ll}\p{Lo}\p{N}_-]{0,63} + // + // No more than 64 labels can be associated with a given job. + map labels = 8; + + // Optional. Job scheduling configuration. + JobScheduling scheduling = 9; + + // Optional. The optional list of prerequisite job step_ids. + // If not specified, the job will start at the beginning of workflow. + repeated string prerequisite_step_ids = 10; +} + +// A Cloud Dataproc workflow template resource. +message WorkflowMetadata { + // The operation state. + enum State { + // Unused. + UNKNOWN = 0; + + // The operation has been created. + PENDING = 1; + + // The operation is running. + RUNNING = 2; + + // The operation is done; either cancelled or completed. + DONE = 3; + } + + // Output only. The "resource name" of the template. + string template = 1; + + // Output only. The version of template at the time of + // workflow instantiation. + int32 version = 2; + + // Output only. The create cluster operation metadata. + ClusterOperation create_cluster = 3; + + // Output only. The workflow graph. + WorkflowGraph graph = 4; + + // Output only. The delete cluster operation metadata. + ClusterOperation delete_cluster = 5; + + // Output only. The workflow state. + State state = 6; + + // Output only. The name of the managed cluster. + string cluster_name = 7; +} + +message ClusterOperation { + // Output only. The id of the cluster operation. + string operation_id = 1; + + // Output only. Error, if operation failed. + string error = 2; + + // Output only. Indicates the operation is done. + bool done = 3; +} + +// The workflow graph. +message WorkflowGraph { + // Output only. The workflow nodes. + repeated WorkflowNode nodes = 1; +} + +// The workflow node. +message WorkflowNode { + enum NodeState { + NODE_STATUS_UNSPECIFIED = 0; + + // The node is awaiting prerequisite node to finish. + BLOCKED = 1; + + // The node is runnable but not running. + RUNNABLE = 2; + + // The node is running. + RUNNING = 3; + + // The node completed successfully. + COMPLETED = 4; + + // The node failed. A node can be marked FAILED because + // its ancestor or peer failed. + FAILED = 5; + } + + // Output only. The name of the node. + string step_id = 1; + + // Output only. Node's prerequisite nodes. + repeated string prerequisite_step_ids = 2; + + // Output only. The job id; populated after the node enters RUNNING state. + string job_id = 3; + + // Output only. The node state. + NodeState state = 5; + + // Output only. The error detail. + string error = 6; +} + +// A request to create a workflow template. +message CreateWorkflowTemplateRequest { + // Required. The "resource name" of the region, as described + // in https://cloud.google.com/apis/design/resource_names of the form + // `projects/{project_id}/regions/{region}` + string parent = 1; + + // Required. The Dataproc workflow template to create. + WorkflowTemplate template = 2; +} + +// A request to fetch a workflow template. +message GetWorkflowTemplateRequest { + // Required. The "resource name" of the workflow template, as described + // in https://cloud.google.com/apis/design/resource_names of the form + // `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}` + string name = 1; + + // Optional. The version of workflow template to retrieve. Only previously + // instatiated versions can be retrieved. + // + // If unspecified, retrieves the current version. + int32 version = 2; +} + +// A request to instantiate a workflow template. +message InstantiateWorkflowTemplateRequest { + // Required. The "resource name" of the workflow template, as described + // in https://cloud.google.com/apis/design/resource_names of the form + // `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}` + string name = 1; + + // Optional. The version of workflow template to instantiate. If specified, + // the workflow will be instantiated only if the current version of + // the workflow template has the supplied version. + // + // This option cannot be used to instantiate a previous version of + // workflow template. + int32 version = 2; + + // Optional. A tag that prevents multiple concurrent workflow + // instances with the same tag from running. This mitigates risk of + // concurrent instances started due to retries. + // + // It is recommended to always set this value to a + // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). + // + // The tag must contain only letters (a-z, A-Z), numbers (0-9), + // underscores (_), and hyphens (-). The maximum length is 40 characters. + string instance_id = 3; +} + +// A request to update a workflow template. +message UpdateWorkflowTemplateRequest { + // Required. The updated workflow template. + // + // The `template.version` field must match the current version. + WorkflowTemplate template = 1; +} + +// A request to list workflow templates in a project. +message ListWorkflowTemplatesRequest { + // Required. The "resource name" of the region, as described + // in https://cloud.google.com/apis/design/resource_names of the form + // `projects/{project_id}/regions/{region}` + string parent = 1; + + // Optional. The maximum number of results to return in each response. + int32 page_size = 2; + + // Optional. The page token, returned by a previous call, to request the + // next page of results. + string page_token = 3; +} + +// A response to a request to list workflow templates in a project. +message ListWorkflowTemplatesResponse { + // Output only. WorkflowTemplates list. + repeated WorkflowTemplate templates = 1; + + // Output only. This token is included in the response if there are more results + // to fetch. To fetch additional results, provide this value as the + // page_token in a subsequent ListWorkflowTemplatesRequest. + string next_page_token = 2; +} + +// A request to delete a workflow template. +// +// Currently started workflows will remain running. +message DeleteWorkflowTemplateRequest { + // Required. The "resource name" of the workflow template, as described + // in https://cloud.google.com/apis/design/resource_names of the form + // `projects/{project_id}/regions/{region}/workflowTemplates/{template_id}` + string name = 1; + + // Optional. The version of workflow template to delete. If specified, + // will only delete the template if the current server version matches + // specified version. + int32 version = 2; +}