gigl.src.common.utils.spark_job_manager#

Attributes#

Classes#

Module Contents#

class gigl.src.common.utils.spark_job_manager.DataprocClusterInitData[source]#
cluster_name: str[source]#
debug_cluster_owner_alias: str | None = None[source]#
init_script_uri: gigl.common.GcsUri | None = None[source]#
is_debug_mode: bool[source]#
labels: Dict[str, str] | None = None[source]#
machine_type: str[source]#
num_local_ssds: int[source]#
num_workers: int[source]#
project: str[source]#
region: str[source]#
service_account: str[source]#
temp_assets_bucket: str[source]#
class gigl.src.common.utils.spark_job_manager.SparkJobManager(project, region, cluster_name)[source]#
Parameters:
  • project (str)

  • region (str)

  • cluster_name (str)

create_dataproc_cluster(cluster_init_data, use_spark35=False)[source]#
Parameters:
delete_cluster()[source]#
static get_sanitized_dataproc_cluster_name(cluster_name)[source]#
Parameters:

cluster_name (str)

submit_and_wait_scala_spark_job(main_jar_file_uri, max_job_duration, runtime_args=[], extra_jar_file_uris=[], use_spark35=False)[source]#
Parameters:
  • main_jar_file_uri (gigl.common.Uri)

  • max_job_duration (datetime.timedelta)

  • runtime_args (List[str])

  • extra_jar_file_uris (List[str])

  • use_spark35 (bool)

gigl.src.common.utils.spark_job_manager.IDLE_TTL_DEFAULT_S = 600[source]#
gigl.src.common.utils.spark_job_manager.IDLE_TTL_DEV_DEFAULT_S = 36000[source]#
gigl.src.common.utils.spark_job_manager.logger[source]#