Adding Google Cloud Dataproc protos
This commit is contained in:
parent
b841cabdd6
commit
b307bde9d7
|
|
@ -0,0 +1,443 @@
|
|||
// Copyright 2016 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.cloud.dataproc.v1;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
import "google/cloud/dataproc/v1/operations.proto";
|
||||
import "google/longrunning/operations.proto";
|
||||
import "google/protobuf/duration.proto";
|
||||
import "google/protobuf/field_mask.proto";
|
||||
import "google/protobuf/timestamp.proto";
|
||||
|
||||
option java_multiple_files = true;
|
||||
option java_outer_classname = "ClustersProto";
|
||||
option java_package = "com.google.cloud.dataproc.v1";
|
||||
|
||||
|
||||
// The ClusterControllerService provides methods to manage clusters
|
||||
// of Google Compute Engine instances.
|
||||
service ClusterController {
|
||||
// Creates a cluster in a project.
|
||||
rpc CreateCluster(CreateClusterRequest) returns (google.longrunning.Operation) {
|
||||
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters" body: "cluster" };
|
||||
}
|
||||
|
||||
// Updates a cluster in a project.
|
||||
rpc UpdateCluster(UpdateClusterRequest) returns (google.longrunning.Operation) {
|
||||
option (google.api.http) = { patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" body: "cluster" };
|
||||
}
|
||||
|
||||
// Deletes a cluster in a project.
|
||||
rpc DeleteCluster(DeleteClusterRequest) returns (google.longrunning.Operation) {
|
||||
option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
|
||||
}
|
||||
|
||||
// Gets the resource representation for a cluster in a project.
|
||||
rpc GetCluster(GetClusterRequest) returns (Cluster) {
|
||||
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" };
|
||||
}
|
||||
|
||||
// Lists all regions/{region}/clusters in a project.
|
||||
rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
|
||||
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/clusters" };
|
||||
}
|
||||
|
||||
// Gets cluster diagnostic information.
|
||||
// After the operation completes, the Operation.response field
|
||||
// contains `DiagnoseClusterOutputLocation`.
|
||||
rpc DiagnoseCluster(DiagnoseClusterRequest) returns (google.longrunning.Operation) {
|
||||
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" body: "*" };
|
||||
}
|
||||
}
|
||||
|
||||
// Describes the identifying information, config, and status of
|
||||
// a cluster of Google Compute Engine instances.
|
||||
message Cluster {
|
||||
// [Required] The Google Cloud Platform project ID that the cluster belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The cluster name. Cluster names within a project must be
|
||||
// unique. Names of deleted clusters can be reused.
|
||||
string cluster_name = 2;
|
||||
|
||||
// [Required] The cluster config. Note that Cloud Dataproc may set
|
||||
// default values, and values may change when clusters are updated.
|
||||
ClusterConfig config = 3;
|
||||
|
||||
// [Output-only] Cluster status.
|
||||
ClusterStatus status = 4;
|
||||
|
||||
// [Output-only] The previous cluster status.
|
||||
repeated ClusterStatus status_history = 7;
|
||||
|
||||
// [Output-only] A cluster UUID (Unique Universal Identifier). Cloud Dataproc
|
||||
// generates this value when it creates the cluster.
|
||||
string cluster_uuid = 6;
|
||||
}
|
||||
|
||||
// The cluster config.
|
||||
message ClusterConfig {
|
||||
// [Optional] A Google Cloud Storage staging bucket used for sharing generated
|
||||
// SSH keys and config. If you do not specify a staging bucket, Cloud
|
||||
// Dataproc will determine an appropriate Cloud Storage location (US,
|
||||
// ASIA, or EU) for your cluster's staging bucket according to the Google
|
||||
// Compute Engine zone where your cluster is deployed, and then it will create
|
||||
// and manage this project-level, per-location bucket for you.
|
||||
string config_bucket = 1;
|
||||
|
||||
// [Required] The shared Google Compute Engine config settings for
|
||||
// all instances in a cluster.
|
||||
GceClusterConfig gce_cluster_config = 8;
|
||||
|
||||
// [Optional] The Google Compute Engine config settings for
|
||||
// the master instance in a cluster.
|
||||
InstanceGroupConfig master_config = 9;
|
||||
|
||||
// [Optional] The Google Compute Engine config settings for
|
||||
// worker instances in a cluster.
|
||||
InstanceGroupConfig worker_config = 10;
|
||||
|
||||
// [Optional] The Google Compute Engine config settings for
|
||||
// additional worker instances in a cluster.
|
||||
InstanceGroupConfig secondary_worker_config = 12;
|
||||
|
||||
// [Optional] The config settings for software inside the cluster.
|
||||
SoftwareConfig software_config = 13;
|
||||
|
||||
// [Optional] Commands to execute on each node after config is
|
||||
// completed. By default, executables are run on master and all worker nodes.
|
||||
// You can test a node's <code>role</code> metadata to run an executable on
|
||||
// a master or worker node, as shown below using `curl` (you can also use `wget`):
|
||||
//
|
||||
// ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
|
||||
// if [[ "${ROLE}" == 'Master' ]]; then
|
||||
// ... master specific actions ...
|
||||
// else
|
||||
// ... worker specific actions ...
|
||||
// fi
|
||||
repeated NodeInitializationAction initialization_actions = 11;
|
||||
}
|
||||
|
||||
// Common config settings for resources of Google Compute Engine cluster
|
||||
// instances, applicable to all instances in the cluster.
|
||||
message GceClusterConfig {
|
||||
// [Required] The zone where the Google Compute Engine cluster will be located.
|
||||
// Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`.
|
||||
string zone_uri = 1;
|
||||
|
||||
// [Optional] The Google Compute Engine network to be used for machine
|
||||
// communications. Cannot be specified with subnetwork_uri. If neither
|
||||
// `network_uri` nor `subnetwork_uri` is specified, the "default" network of
|
||||
// the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
|
||||
// [Using Subnetworks](/compute/docs/subnetworks) for more information).
|
||||
// Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/global/default`.
|
||||
string network_uri = 2;
|
||||
|
||||
// [Optional] The Google Compute Engine subnetwork to be used for machine
|
||||
// communications. Cannot be specified with network_uri.
|
||||
// Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/us-east1/sub0`.
|
||||
string subnetwork_uri = 6;
|
||||
|
||||
// [Optional] If true, all instances in the cluster will only have internal IP
|
||||
// addresses. By default, clusters are not restricted to internal IP addresses,
|
||||
// and will have ephemeral external IP addresses assigned to each instance.
|
||||
// This `internal_ip_only` restriction can only be enabled for subnetwork
|
||||
// enabled networks, and all off-cluster dependencies must be configured to be
|
||||
// accessible without external IP addresses.
|
||||
bool internal_ip_only = 7;
|
||||
|
||||
// [Optional] The URIs of service account scopes to be included in Google
|
||||
// Compute Engine instances. The following base set of scopes is always
|
||||
// included:
|
||||
//
|
||||
// * https://www.googleapis.com/auth/cloud.useraccounts.readonly
|
||||
// * https://www.googleapis.com/auth/devstorage.read_write
|
||||
// * https://www.googleapis.com/auth/logging.write
|
||||
//
|
||||
// If no scopes are specified, the following defaults are also provided:
|
||||
//
|
||||
// * https://www.googleapis.com/auth/bigquery
|
||||
// * https://www.googleapis.com/auth/bigtable.admin.table
|
||||
// * https://www.googleapis.com/auth/bigtable.data
|
||||
// * https://www.googleapis.com/auth/devstorage.full_control
|
||||
repeated string service_account_scopes = 3;
|
||||
|
||||
// The Google Compute Engine tags to add to all instances (see
|
||||
// [Labeling instances](/compute/docs/label-or-tag-resources#labeling_instances)).
|
||||
repeated string tags = 4;
|
||||
|
||||
// The Google Compute Engine metadata entries to add to all instances (see
|
||||
// [Project and instance metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
|
||||
map<string, string> metadata = 5;
|
||||
}
|
||||
|
||||
// [Optional] The config settings for Google Compute Engine resources in
|
||||
// an instance group, such as a master or worker group.
|
||||
message InstanceGroupConfig {
|
||||
// [Required] The number of VM instances in the instance group.
|
||||
// For master instance groups, must be set to 1.
|
||||
int32 num_instances = 1;
|
||||
|
||||
// [Optional] The list of instance names. Cloud Dataproc derives the names from
|
||||
// `cluster_name`, `num_instances`, and the instance group if not set by user
|
||||
// (recommended practice is to let Cloud Dataproc derive the name).
|
||||
repeated string instance_names = 2;
|
||||
|
||||
// [Output-only] The Google Compute Engine image resource used for cluster
|
||||
// instances. Inferred from `SoftwareConfig.image_version`.
|
||||
string image_uri = 3;
|
||||
|
||||
// [Required] The Google Compute Engine machine type used for cluster instances.
|
||||
// Example: `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/us-east1-a/machineTypes/n1-standard-2`.
|
||||
string machine_type_uri = 4;
|
||||
|
||||
// [Optional] Disk option config settings.
|
||||
DiskConfig disk_config = 5;
|
||||
|
||||
// [Optional] Specifies that this instance group contains preemptible instances.
|
||||
bool is_preemptible = 6;
|
||||
|
||||
// [Output-only] The config for Google Compute Engine Instance Group
|
||||
// Manager that manages this group.
|
||||
// This is only used for preemptible instance groups.
|
||||
ManagedGroupConfig managed_group_config = 7;
|
||||
}
|
||||
|
||||
// Specifies the resources used to actively manage an instance group.
|
||||
message ManagedGroupConfig {
|
||||
// [Output-only] The name of the Instance Template used for the Managed
|
||||
// Instance Group.
|
||||
string instance_template_name = 1;
|
||||
|
||||
// [Output-only] The name of the Instance Group Manager for this group.
|
||||
string instance_group_manager_name = 2;
|
||||
}
|
||||
|
||||
// Specifies the config of disk options for a group of VM instances.
|
||||
message DiskConfig {
|
||||
// [Optional] Size in GB of the boot disk (default is 500GB).
|
||||
int32 boot_disk_size_gb = 1;
|
||||
|
||||
// [Optional] Number of attached SSDs, from 0 to 4 (default is 0).
|
||||
// If SSDs are not attached, the boot disk is used to store runtime logs and
|
||||
// [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
|
||||
// If one or more SSDs are attached, this runtime bulk
|
||||
// data is spread across them, and the boot disk contains only basic
|
||||
// config and installed binaries.
|
||||
int32 num_local_ssds = 2;
|
||||
}
|
||||
|
||||
// Specifies an executable to run on a fully configured node and a
|
||||
// timeout period for executable completion.
|
||||
message NodeInitializationAction {
|
||||
// [Required] Google Cloud Storage URI of executable file.
|
||||
string executable_file = 1;
|
||||
|
||||
// [Optional] Amount of time executable has to complete. Default is
|
||||
// 10 minutes. Cluster creation fails with an explanatory error message (the
|
||||
// name of the executable that caused the error and the exceeded timeout
|
||||
// period) if the executable is not completed at end of the timeout period.
|
||||
google.protobuf.Duration execution_timeout = 2;
|
||||
}
|
||||
|
||||
// The status of a cluster and its instances.
|
||||
message ClusterStatus {
|
||||
// The cluster state.
|
||||
enum State {
|
||||
// The cluster state is unknown.
|
||||
UNKNOWN = 0;
|
||||
|
||||
// The cluster is being created and set up. It is not ready for use.
|
||||
CREATING = 1;
|
||||
|
||||
// The cluster is currently running and healthy. It is ready for use.
|
||||
RUNNING = 2;
|
||||
|
||||
// The cluster encountered an error. It is not ready for use.
|
||||
ERROR = 3;
|
||||
|
||||
// The cluster is being deleted. It cannot be used.
|
||||
DELETING = 4;
|
||||
|
||||
// The cluster is being updated. It continues to accept and process jobs.
|
||||
UPDATING = 5;
|
||||
}
|
||||
|
||||
// [Output-only] The cluster's state.
|
||||
State state = 1;
|
||||
|
||||
// [Output-only] Optional details of cluster's state.
|
||||
string detail = 2;
|
||||
|
||||
// [Output-only] Time when this state was entered.
|
||||
google.protobuf.Timestamp state_start_time = 3;
|
||||
}
|
||||
|
||||
// Specifies the selection and config of software inside the cluster.
|
||||
message SoftwareConfig {
|
||||
// [Optional] The version of software inside the cluster. It must match the
|
||||
// regular expression `[0-9]+\.[0-9]+`. If unspecified, it defaults to the
|
||||
// latest version (see [Cloud Dataproc Versioning](/dataproc/versioning)).
|
||||
string image_version = 1;
|
||||
|
||||
// [Optional] The properties to set on daemon config files.
|
||||
//
|
||||
// Property keys are specified in `prefix:property` format, such as
|
||||
// `core:fs.defaultFS`. The following are supported prefixes
|
||||
// and their mappings:
|
||||
//
|
||||
// * core: `core-site.xml`
|
||||
// * hdfs: `hdfs-site.xml`
|
||||
// * mapred: `mapred-site.xml`
|
||||
// * yarn: `yarn-site.xml`
|
||||
// * hive: `hive-site.xml`
|
||||
// * pig: `pig.properties`
|
||||
// * spark: `spark-defaults.conf`
|
||||
map<string, string> properties = 2;
|
||||
}
|
||||
|
||||
// A request to create a cluster.
|
||||
message CreateClusterRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project that the cluster
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 3;
|
||||
|
||||
// [Required] The cluster to create.
|
||||
Cluster cluster = 2;
|
||||
}
|
||||
|
||||
// A request to update a cluster.
|
||||
message UpdateClusterRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project the
|
||||
// cluster belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 5;
|
||||
|
||||
// [Required] The cluster name.
|
||||
string cluster_name = 2;
|
||||
|
||||
// [Required] The changes to the cluster.
|
||||
Cluster cluster = 3;
|
||||
|
||||
// [Required] Specifies the path, relative to <code>Cluster</code>, of
|
||||
// the field to update. For example, to change the number of workers
|
||||
// in a cluster to 5, the <code>update_mask</code> parameter would be
|
||||
// specified as <code>config.worker_config.num_instances</code>,
|
||||
// and the `PATCH` request body would specify the new value, as follows:
|
||||
//
|
||||
// {
|
||||
// "config":{
|
||||
// "workerConfig":{
|
||||
// "numInstances":"5"
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// Similarly, to change the number of preemptible workers in a cluster to 5, the
|
||||
// <code>update_mask</code> parameter would be <code>config.secondary_worker_config.num_instances</code>,
|
||||
// and the `PATCH` request body would be set as follows:
|
||||
//
|
||||
// {
|
||||
// "config":{
|
||||
// "secondaryWorkerConfig":{
|
||||
// "numInstances":"5"
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// <strong>Note:</strong> Currently, <code>config.worker_config.num_instances</code>
|
||||
// and <code>config.secondary_worker_config.num_instances</code> are the only
|
||||
// fields that can be updated.
|
||||
google.protobuf.FieldMask update_mask = 4;
|
||||
}
|
||||
|
||||
// A request to delete a cluster.
|
||||
message DeleteClusterRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project that the cluster
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 3;
|
||||
|
||||
// [Required] The cluster name.
|
||||
string cluster_name = 2;
|
||||
}
|
||||
|
||||
// Request to get the resource representation for a cluster in a project.
|
||||
message GetClusterRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project that the cluster
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 3;
|
||||
|
||||
// [Required] The cluster name.
|
||||
string cluster_name = 2;
|
||||
}
|
||||
|
||||
// A request to list the clusters in a project.
|
||||
message ListClustersRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project that the cluster
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 4;
|
||||
|
||||
// [Optional] The standard List page size.
|
||||
int32 page_size = 2;
|
||||
|
||||
// [Optional] The standard List page token.
|
||||
string page_token = 3;
|
||||
}
|
||||
|
||||
// The list of all clusters in a project.
|
||||
message ListClustersResponse {
|
||||
// [Output-only] The clusters in the project.
|
||||
repeated Cluster clusters = 1;
|
||||
|
||||
// [Output-only] This token is included in the response if there are more
|
||||
// results to fetch. To fetch additional results, provide this value as the
|
||||
// `page_token` in a subsequent <code>ListClustersRequest</code>.
|
||||
string next_page_token = 2;
|
||||
}
|
||||
|
||||
// A request to collect cluster diagnostic information.
|
||||
message DiagnoseClusterRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project that the cluster
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 3;
|
||||
|
||||
// [Required] The cluster name.
|
||||
string cluster_name = 2;
|
||||
}
|
||||
|
||||
// The location of diagnostic output.
|
||||
message DiagnoseClusterResults {
|
||||
// [Output-only] The Google Cloud Storage URI of the diagnostic output.
|
||||
// The output report is a plain text file with a summary of collected
|
||||
// diagnostics.
|
||||
string output_uri = 1;
|
||||
}
|
||||
|
|
@ -0,0 +1,572 @@
|
|||
// Copyright 2016 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.cloud.dataproc.v1;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
import "google/protobuf/empty.proto";
|
||||
import "google/protobuf/timestamp.proto";
|
||||
|
||||
option java_multiple_files = true;
|
||||
option java_outer_classname = "JobsProto";
|
||||
option java_package = "com.google.cloud.dataproc.v1";
|
||||
|
||||
|
||||
// The JobController provides methods to manage jobs.
|
||||
service JobController {
|
||||
// Submits a job to a cluster.
|
||||
rpc SubmitJob(SubmitJobRequest) returns (Job) {
|
||||
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs:submit" body: "*" };
|
||||
}
|
||||
|
||||
// Gets the resource representation for a job in a project.
|
||||
rpc GetJob(GetJobRequest) returns (Job) {
|
||||
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
|
||||
}
|
||||
|
||||
// Lists regions/{region}/jobs in a project.
|
||||
rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
|
||||
option (google.api.http) = { get: "/v1/projects/{project_id}/regions/{region}/jobs" };
|
||||
}
|
||||
|
||||
// Starts a job cancellation request. To access the job resource
|
||||
// after cancellation, call
|
||||
// [regions/{region}/jobs.list](/dataproc/reference/rest/v1/projects.regions.jobs/list) or
|
||||
// [regions/{region}/jobs.get](/dataproc/reference/rest/v1/projects.regions.jobs/get).
|
||||
rpc CancelJob(CancelJobRequest) returns (Job) {
|
||||
option (google.api.http) = { post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" body: "*" };
|
||||
}
|
||||
|
||||
// Deletes the job from the project. If the job is active, the delete fails,
|
||||
// and the response returns `FAILED_PRECONDITION`.
|
||||
rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
|
||||
option (google.api.http) = { delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" };
|
||||
}
|
||||
}
|
||||
|
||||
// The runtime logging config of the job.
|
||||
message LoggingConfig {
|
||||
// The Log4j level for job execution. When running an
|
||||
// [Apache Hive](http://hive.apache.org/) job, Cloud
|
||||
// Dataproc configures the Hive client to an equivalent verbosity level.
|
||||
enum Level {
|
||||
// Level is unspecified. Use default level for log4j.
|
||||
LEVEL_UNSPECIFIED = 0;
|
||||
|
||||
// Use ALL level for log4j.
|
||||
ALL = 1;
|
||||
|
||||
// Use TRACE level for log4j.
|
||||
TRACE = 2;
|
||||
|
||||
// Use DEBUG level for log4j.
|
||||
DEBUG = 3;
|
||||
|
||||
// Use INFO level for log4j.
|
||||
INFO = 4;
|
||||
|
||||
// Use WARN level for log4j.
|
||||
WARN = 5;
|
||||
|
||||
// Use ERROR level for log4j.
|
||||
ERROR = 6;
|
||||
|
||||
// Use FATAL level for log4j.
|
||||
FATAL = 7;
|
||||
|
||||
// Turn off log4j.
|
||||
OFF = 8;
|
||||
}
|
||||
|
||||
// The per-package log levels for the driver. This may include
|
||||
// "root" package name to configure rootLogger.
|
||||
// Examples:
|
||||
// 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
|
||||
map<string, Level> driver_log_levels = 2;
|
||||
}
|
||||
|
||||
// A Cloud Dataproc job for running
|
||||
// [Apache Hadoop MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
|
||||
// jobs on [Apache Hadoop YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
|
||||
message HadoopJob {
|
||||
// [Required] Indicates the location of the driver's main class. Specify
|
||||
// either the jar file that contains the main class or the main class name.
|
||||
// To specify both, add the jar file to `jar_file_uris`, and then specify
|
||||
// the main class name in this property.
|
||||
oneof driver {
|
||||
// The HCFS URI of the jar file containing the main class.
|
||||
// Examples:
|
||||
// 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
|
||||
// 'hdfs:/tmp/test-samples/custom-wordcount.jar'
|
||||
// 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
|
||||
string main_jar_file_uri = 1;
|
||||
|
||||
// The name of the driver's main class. The jar file containing the class
|
||||
// must be in the default CLASSPATH or specified in `jar_file_uris`.
|
||||
string main_class = 2;
|
||||
}
|
||||
|
||||
// [Optional] The arguments to pass to the driver. Do not
|
||||
// include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as job
|
||||
// properties, since a collision may occur that causes an incorrect job
|
||||
// submission.
|
||||
repeated string args = 3;
|
||||
|
||||
// [Optional] Jar file URIs to add to the CLASSPATHs of the
|
||||
// Hadoop driver and tasks.
|
||||
repeated string jar_file_uris = 4;
|
||||
|
||||
// [Optional] HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
|
||||
// to the working directory of Hadoop drivers and distributed tasks. Useful
|
||||
// for naively parallel tasks.
|
||||
repeated string file_uris = 5;
|
||||
|
||||
// [Optional] HCFS URIs of archives to be extracted in the working directory of
|
||||
// Hadoop drivers and tasks. Supported file types:
|
||||
// .jar, .tar, .tar.gz, .tgz, or .zip.
|
||||
repeated string archive_uris = 6;
|
||||
|
||||
// [Optional] A mapping of property names to values, used to configure Hadoop.
|
||||
// Properties that conflict with values set by the Cloud Dataproc API may be
|
||||
// overwritten. Can include properties set in /etc/hadoop/conf/*-site and
|
||||
// classes in user code.
|
||||
map<string, string> properties = 7;
|
||||
|
||||
// [Optional] The runtime log config for job execution.
|
||||
LoggingConfig logging_config = 8;
|
||||
}
|
||||
|
||||
// A Cloud Dataproc job for running [Apache Spark](http://spark.apache.org/)
|
||||
// applications on YARN.
|
||||
message SparkJob {
|
||||
// [Required] The specification of the main method to call to drive the job.
|
||||
// Specify either the jar file that contains the main class or the main class
|
||||
// name. To pass both a main jar and a main class in that jar, add the jar to
|
||||
// `CommonJob.jar_file_uris`, and then specify the main class name in `main_class`.
|
||||
oneof driver {
|
||||
// The HCFS URI of the jar file that contains the main class.
|
||||
string main_jar_file_uri = 1;
|
||||
|
||||
// The name of the driver's main class. The jar file that contains the class
|
||||
// must be in the default CLASSPATH or specified in `jar_file_uris`.
|
||||
string main_class = 2;
|
||||
}
|
||||
|
||||
// [Optional] The arguments to pass to the driver. Do not include arguments,
|
||||
// such as `--conf`, that can be set as job properties, since a collision may
|
||||
// occur that causes an incorrect job submission.
|
||||
repeated string args = 3;
|
||||
|
||||
// [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
|
||||
// Spark driver and tasks.
|
||||
repeated string jar_file_uris = 4;
|
||||
|
||||
// [Optional] HCFS URIs of files to be copied to the working directory of
|
||||
// Spark drivers and distributed tasks. Useful for naively parallel tasks.
|
||||
repeated string file_uris = 5;
|
||||
|
||||
// [Optional] HCFS URIs of archives to be extracted in the working directory
|
||||
// of Spark drivers and tasks. Supported file types:
|
||||
// .jar, .tar, .tar.gz, .tgz, and .zip.
|
||||
repeated string archive_uris = 6;
|
||||
|
||||
// [Optional] A mapping of property names to values, used to configure Spark.
|
||||
// Properties that conflict with values set by the Cloud Dataproc API may be
|
||||
// overwritten. Can include properties set in
|
||||
// /etc/spark/conf/spark-defaults.conf and classes in user code.
|
||||
map<string, string> properties = 7;
|
||||
|
||||
// [Optional] The runtime log config for job execution.
|
||||
LoggingConfig logging_config = 8;
|
||||
}
|
||||
|
||||
// A Cloud Dataproc job for running
|
||||
// [Apache PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
|
||||
// applications on YARN.
|
||||
message PySparkJob {
|
||||
// [Required] The HCFS URI of the main Python file to use as the driver. Must
|
||||
// be a .py file.
|
||||
string main_python_file_uri = 1;
|
||||
|
||||
// [Optional] The arguments to pass to the driver. Do not include arguments,
|
||||
// such as `--conf`, that can be set as job properties, since a collision may
|
||||
// occur that causes an incorrect job submission.
|
||||
repeated string args = 2;
|
||||
|
||||
// [Optional] HCFS file URIs of Python files to pass to the PySpark
|
||||
// framework. Supported file types: .py, .egg, and .zip.
|
||||
repeated string python_file_uris = 3;
|
||||
|
||||
// [Optional] HCFS URIs of jar files to add to the CLASSPATHs of the
|
||||
// Python driver and tasks.
|
||||
repeated string jar_file_uris = 4;
|
||||
|
||||
// [Optional] HCFS URIs of files to be copied to the working directory of
|
||||
// Python drivers and distributed tasks. Useful for naively parallel tasks.
|
||||
repeated string file_uris = 5;
|
||||
|
||||
// [Optional] HCFS URIs of archives to be extracted in the working directory of
|
||||
// .jar, .tar, .tar.gz, .tgz, and .zip.
|
||||
repeated string archive_uris = 6;
|
||||
|
||||
// [Optional] A mapping of property names to values, used to configure PySpark.
|
||||
// Properties that conflict with values set by the Cloud Dataproc API may be
|
||||
// overwritten. Can include properties set in
|
||||
// /etc/spark/conf/spark-defaults.conf and classes in user code.
|
||||
map<string, string> properties = 7;
|
||||
|
||||
// [Optional] The runtime log config for job execution.
|
||||
LoggingConfig logging_config = 8;
|
||||
}
|
||||
|
||||
// A list of queries to run on a cluster.
|
||||
message QueryList {
|
||||
// [Required] The queries to execute. You do not need to terminate a query
|
||||
// with a semicolon. Multiple queries can be specified in one string
|
||||
// by separating each with a semicolon. Here is an example of an Cloud
|
||||
// Dataproc API snippet that uses a QueryList to specify a HiveJob:
|
||||
//
|
||||
// "hiveJob": {
|
||||
// "queryList": {
|
||||
// "queries": [
|
||||
// "query1",
|
||||
// "query2",
|
||||
// "query3;query4",
|
||||
// ]
|
||||
// }
|
||||
// }
|
||||
repeated string queries = 1;
|
||||
}
|
||||
|
||||
// A Cloud Dataproc job for running [Apache Hive](https://hive.apache.org/)
|
||||
// queries on YARN.
|
||||
message HiveJob {
|
||||
// [Required] The sequence of Hive queries to execute, specified as either
|
||||
// an HCFS file URI or a list of queries.
|
||||
oneof queries {
|
||||
// The HCFS URI of the script that contains Hive queries.
|
||||
string query_file_uri = 1;
|
||||
|
||||
// A list of queries.
|
||||
QueryList query_list = 2;
|
||||
}
|
||||
|
||||
// [Optional] Whether to continue executing queries if a query fails.
|
||||
// The default value is `false`. Setting to `true` can be useful when executing
|
||||
// independent parallel queries.
|
||||
bool continue_on_failure = 3;
|
||||
|
||||
// [Optional] Mapping of query variable names to values (equivalent to the
|
||||
// Hive command: `SET name="value";`).
|
||||
map<string, string> script_variables = 4;
|
||||
|
||||
// [Optional] A mapping of property names and values, used to configure Hive.
|
||||
// Properties that conflict with values set by the Cloud Dataproc API may be
|
||||
// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
|
||||
// /etc/hive/conf/hive-site.xml, and classes in user code.
|
||||
map<string, string> properties = 5;
|
||||
|
||||
// [Optional] HCFS URIs of jar files to add to the CLASSPATH of the
|
||||
// Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
|
||||
// and UDFs.
|
||||
repeated string jar_file_uris = 6;
|
||||
}
|
||||
|
||||
// A Cloud Dataproc job for running [Apache Spark SQL](http://spark.apache.org/sql/)
|
||||
// queries.
|
||||
message SparkSqlJob {
|
||||
// [Required] The sequence of Spark SQL queries to execute, specified as
|
||||
// either an HCFS file URI or as a list of queries.
|
||||
oneof queries {
|
||||
// The HCFS URI of the script that contains SQL queries.
|
||||
string query_file_uri = 1;
|
||||
|
||||
// A list of queries.
|
||||
QueryList query_list = 2;
|
||||
}
|
||||
|
||||
// [Optional] Mapping of query variable names to values (equivalent to the
|
||||
// Spark SQL command: SET `name="value";`).
|
||||
map<string, string> script_variables = 3;
|
||||
|
||||
// [Optional] A mapping of property names to values, used to configure
|
||||
// Spark SQL's SparkConf. Properties that conflict with values set by the
|
||||
// Cloud Dataproc API may be overwritten.
|
||||
map<string, string> properties = 4;
|
||||
|
||||
// [Optional] HCFS URIs of jar files to be added to the Spark CLASSPATH.
|
||||
repeated string jar_file_uris = 56;
|
||||
|
||||
// [Optional] The runtime log config for job execution.
|
||||
LoggingConfig logging_config = 6;
|
||||
}
|
||||
|
||||
// A Cloud Dataproc job for running [Apache Pig](https://pig.apache.org/)
|
||||
// queries on YARN.
|
||||
message PigJob {
|
||||
// [Required] The sequence of Pig queries to execute, specified as an HCFS
|
||||
// file URI or a list of queries.
|
||||
oneof queries {
|
||||
// The HCFS URI of the script that contains the Pig queries.
|
||||
string query_file_uri = 1;
|
||||
|
||||
// A list of queries.
|
||||
QueryList query_list = 2;
|
||||
}
|
||||
|
||||
// [Optional] Whether to continue executing queries if a query fails.
|
||||
// The default value is `false`. Setting to `true` can be useful when executing
|
||||
// independent parallel queries.
|
||||
bool continue_on_failure = 3;
|
||||
|
||||
// [Optional] Mapping of query variable names to values (equivalent to the Pig
|
||||
// command: `name=[value]`).
|
||||
map<string, string> script_variables = 4;
|
||||
|
||||
// [Optional] A mapping of property names to values, used to configure Pig.
|
||||
// Properties that conflict with values set by the Cloud Dataproc API may be
|
||||
// overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
|
||||
// /etc/pig/conf/pig.properties, and classes in user code.
|
||||
map<string, string> properties = 5;
|
||||
|
||||
// [Optional] HCFS URIs of jar files to add to the CLASSPATH of
|
||||
// the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
|
||||
repeated string jar_file_uris = 6;
|
||||
|
||||
// [Optional] The runtime log config for job execution.
|
||||
LoggingConfig logging_config = 7;
|
||||
}
|
||||
|
||||
// Cloud Dataproc job config.
|
||||
message JobPlacement {
|
||||
// [Required] The name of the cluster where the job will be submitted.
|
||||
string cluster_name = 1;
|
||||
|
||||
// [Output-only] A cluster UUID generated by the Cloud Dataproc service when
|
||||
// the job is submitted.
|
||||
string cluster_uuid = 2;
|
||||
}
|
||||
|
||||
// Cloud Dataproc job status.
|
||||
message JobStatus {
|
||||
// The job state.
|
||||
enum State {
|
||||
// The job state is unknown.
|
||||
STATE_UNSPECIFIED = 0;
|
||||
|
||||
// The job is pending; it has been submitted, but is not yet running.
|
||||
PENDING = 1;
|
||||
|
||||
// Job has been received by the service and completed initial setup;
|
||||
// it will soon be submitted to the cluster.
|
||||
SETUP_DONE = 8;
|
||||
|
||||
// The job is running on the cluster.
|
||||
RUNNING = 2;
|
||||
|
||||
// A CancelJob request has been received, but is pending.
|
||||
CANCEL_PENDING = 3;
|
||||
|
||||
// Transient in-flight resources have been canceled, and the request to
|
||||
// cancel the running job has been issued to the cluster.
|
||||
CANCEL_STARTED = 7;
|
||||
|
||||
// The job cancellation was successful.
|
||||
CANCELLED = 4;
|
||||
|
||||
// The job has completed successfully.
|
||||
DONE = 5;
|
||||
|
||||
// The job has completed, but encountered an error.
|
||||
ERROR = 6;
|
||||
}
|
||||
|
||||
// [Output-only] A state message specifying the overall job state.
|
||||
State state = 1;
|
||||
|
||||
// [Output-only] Optional job state details, such as an error
|
||||
// description if the state is <code>ERROR</code>.
|
||||
string details = 2;
|
||||
|
||||
// [Output-only] The time when this state was entered.
|
||||
google.protobuf.Timestamp state_start_time = 6;
|
||||
}
|
||||
|
||||
// Encapsulates the full scoping used to reference a job.
|
||||
message JobReference {
|
||||
// [Required] The ID of the Google Cloud Platform project that the job
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Optional] The job ID, which must be unique within the project. The job ID
|
||||
// is generated by the server upon job submission or provided by the user as a
|
||||
// means to perform retries without creating duplicate jobs. The ID must
|
||||
// contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or
|
||||
// hyphens (-). The maximum length is 512 characters.
|
||||
string job_id = 2;
|
||||
}
|
||||
|
||||
// A Cloud Dataproc job resource.
|
||||
message Job {
|
||||
// [Optional] The fully qualified reference to the job, which can be used to
|
||||
// obtain the equivalent REST path of the job resource. If this property
|
||||
// is not specified when a job is created, the server generates a
|
||||
// <code>job_id</code>.
|
||||
JobReference reference = 1;
|
||||
|
||||
// [Required] Job information, including how, when, and where to
|
||||
// run the job.
|
||||
JobPlacement placement = 2;
|
||||
|
||||
// [Required] The application/framework-specific portion of the job.
|
||||
oneof type_job {
|
||||
// Job is a Hadoop job.
|
||||
HadoopJob hadoop_job = 3;
|
||||
|
||||
// Job is a Spark job.
|
||||
SparkJob spark_job = 4;
|
||||
|
||||
// Job is a Pyspark job.
|
||||
PySparkJob pyspark_job = 5;
|
||||
|
||||
// Job is a Hive job.
|
||||
HiveJob hive_job = 6;
|
||||
|
||||
// Job is a Pig job.
|
||||
PigJob pig_job = 7;
|
||||
|
||||
// Job is a SparkSql job.
|
||||
SparkSqlJob spark_sql_job = 12;
|
||||
}
|
||||
|
||||
// [Output-only] The job status. Additional application-specific
|
||||
// status information may be contained in the <code>type_job</code>
|
||||
// and <code>yarn_applications</code> fields.
|
||||
JobStatus status = 8;
|
||||
|
||||
// [Output-only] The previous job status.
|
||||
repeated JobStatus status_history = 13;
|
||||
|
||||
// [Output-only] A URI pointing to the location of the stdout of the job's
|
||||
// driver program.
|
||||
string driver_output_resource_uri = 17;
|
||||
|
||||
// [Output-only] If present, the location of miscellaneous control files
|
||||
// which may be used as part of job setup and handling. If not present,
|
||||
// control files may be placed in the same location as `driver_output_uri`.
|
||||
string driver_control_files_uri = 15;
|
||||
}
|
||||
|
||||
// A request to submit a job.
|
||||
message SubmitJobRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project that the job
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 3;
|
||||
|
||||
// [Required] The job resource.
|
||||
Job job = 2;
|
||||
}
|
||||
|
||||
// A request to get the resource representation for a job in a project.
|
||||
message GetJobRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project that the job
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 3;
|
||||
|
||||
// [Required] The job ID.
|
||||
string job_id = 2;
|
||||
}
|
||||
|
||||
// A request to list jobs in a project.
|
||||
message ListJobsRequest {
|
||||
// A matcher that specifies categories of job states.
|
||||
enum JobStateMatcher {
|
||||
// Match all jobs, regardless of state.
|
||||
ALL = 0;
|
||||
|
||||
// Only match jobs in non-terminal states: PENDING, RUNNING, or
|
||||
// CANCEL_PENDING.
|
||||
ACTIVE = 1;
|
||||
|
||||
// Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
|
||||
NON_ACTIVE = 2;
|
||||
}
|
||||
|
||||
// [Required] The ID of the Google Cloud Platform project that the job
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 6;
|
||||
|
||||
// [Optional] The number of results to return in each response.
|
||||
int32 page_size = 2;
|
||||
|
||||
// [Optional] The page token, returned by a previous call, to request the
|
||||
// next page of results.
|
||||
string page_token = 3;
|
||||
|
||||
// [Optional] If set, the returned jobs list includes only jobs that were
|
||||
// submitted to the named cluster.
|
||||
string cluster_name = 4;
|
||||
|
||||
// [Optional] Specifies enumerated categories of jobs to list
|
||||
// (default = match ALL jobs).
|
||||
JobStateMatcher job_state_matcher = 5;
|
||||
}
|
||||
|
||||
// A list of jobs in a project.
|
||||
message ListJobsResponse {
|
||||
// [Output-only] Jobs list.
|
||||
repeated Job jobs = 1;
|
||||
|
||||
// [Optional] This token is included in the response if there are more results
|
||||
// to fetch. To fetch additional results, provide this value as the
|
||||
// `page_token` in a subsequent <code>ListJobsRequest</code>.
|
||||
string next_page_token = 2;
|
||||
}
|
||||
|
||||
// A request to cancel a job.
|
||||
message CancelJobRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project that the job
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 3;
|
||||
|
||||
// [Required] The job ID.
|
||||
string job_id = 2;
|
||||
}
|
||||
|
||||
// A request to delete a job.
|
||||
message DeleteJobRequest {
|
||||
// [Required] The ID of the Google Cloud Platform project that the job
|
||||
// belongs to.
|
||||
string project_id = 1;
|
||||
|
||||
// [Required] The Cloud Dataproc region in which to handle the request.
|
||||
string region = 3;
|
||||
|
||||
// [Required] The job ID.
|
||||
string job_id = 2;
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
// Copyright 2016 Google Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
syntax = "proto3";
|
||||
|
||||
package google.cloud.dataproc.v1;
|
||||
|
||||
import "google/api/annotations.proto";
|
||||
import "google/longrunning/operations.proto";
|
||||
import "google/protobuf/empty.proto";
|
||||
import "google/protobuf/timestamp.proto";
|
||||
|
||||
option java_multiple_files = true;
|
||||
option java_outer_classname = "OperationsProto";
|
||||
option java_package = "com.google.cloud.dataproc.v1";
|
||||
|
||||
|
||||
// The status of the operation.
|
||||
message ClusterOperationStatus {
|
||||
// The operation state.
|
||||
enum State {
|
||||
// Unused.
|
||||
UNKNOWN = 0;
|
||||
|
||||
// The operation has been created.
|
||||
PENDING = 1;
|
||||
|
||||
// The operation is running.
|
||||
RUNNING = 2;
|
||||
|
||||
// The operation is done; either cancelled or completed.
|
||||
DONE = 3;
|
||||
}
|
||||
|
||||
// [Output-only] A message containing the operation state.
|
||||
State state = 1;
|
||||
|
||||
// [Output-only] A message containing the detailed operation state.
|
||||
string inner_state = 2;
|
||||
|
||||
// [Output-only]A message containing any operation metadata details.
|
||||
string details = 3;
|
||||
|
||||
// [Output-only] The time this state was entered.
|
||||
google.protobuf.Timestamp state_start_time = 4;
|
||||
}
|
||||
|
||||
// Metadata describing the operation.
|
||||
message ClusterOperationMetadata {
|
||||
// [Output-only] Name of the cluster for the operation.
|
||||
string cluster_name = 7;
|
||||
|
||||
// [Output-only] Cluster UUID for the operation.
|
||||
string cluster_uuid = 8;
|
||||
|
||||
// [Output-only] Current operation status.
|
||||
ClusterOperationStatus status = 9;
|
||||
|
||||
// [Output-only] The previous operation status.
|
||||
repeated ClusterOperationStatus status_history = 10;
|
||||
|
||||
// [Output-only] The operation type.
|
||||
string operation_type = 11;
|
||||
|
||||
// [Output-only] Short description of operation.
|
||||
string description = 12;
|
||||
}
|
||||
Loading…
Reference in New Issue