Source code for gigl.experimental.knowledge_graph_embedding.lib.constants.gcs
import gigl.src.common.constants.gcs as gcs_constants
from gigl.common import GcsUri
from gigl.src.common.types import AppliedTaskIdentifier
from gigl.src.common.types.graph_data import EdgeType
[docs]
def get_applied_task_staging_path(
applied_task_identifier: AppliedTaskIdentifier,
) -> GcsUri:
"""
Returns the GCS URI for the staging/temp path of the applied task.
Args:
applied_task_identifier: The identifier of the applied task.
Returns:
GcsUri: The GCS URI for the staging path of the applied task.
"""
return gcs_constants.get_applied_task_temp_regional_gcs_path(
applied_task_identifier=applied_task_identifier
)
[docs]
def get_edge_dataset_output_path(
applied_task_identifier: AppliedTaskIdentifier,
) -> GcsUri:
"""
Returns the GCS URI for edge data to be written to (to read during training).
Args:
applied_task_identifier: The identifier of the applied task.
Returns:
GcsUri: The GCS URI for the staging path of the applied task.
"""
return GcsUri.join(
get_applied_task_staging_path(applied_task_identifier=applied_task_identifier),
"edge_dataset",
)
[docs]
def get_embedding_output_path(applied_task_identifier: AppliedTaskIdentifier) -> GcsUri:
"""
Returns the GCS URI for the embeddings to be written to.
Args:
applied_task_identifier: The identifier of the applied task.
Returns:
GcsUri: The GCS URI for the staging path of the applied task.
"""
return GcsUri.join(
get_applied_task_staging_path(applied_task_identifier=applied_task_identifier),
"embeddings",
)
[docs]
def get_embedding_output_path_for_edge_type(
applied_task_identifier: AppliedTaskIdentifier,
edge_type: EdgeType,
) -> GcsUri:
"""
Returns the GCS URI for the embedding output path for a specific edge type.
Args:
applied_task_identifier: The identifier of the applied task.
edge_type: The edge type for which to get the embedding output path.
Returns:
GcsUri: The GCS URI for the embedding output path for the specified edge type.
"""
return GcsUri.join(
get_embedding_output_path(applied_task_identifier=applied_task_identifier),
f"{edge_type.src_node_type}_{edge_type.relation}_{edge_type.dst_node_type}",
)
[docs]
def get_embedding_output_path_for_src_node(
applied_task_identifier: AppliedTaskIdentifier, edge_type: EdgeType
):
"""
Returns the GCS URI for the embedding output path for a specific source node type.
Args:
applied_task_identifier: The identifier of the applied task.
edge_type: The edge type for which to get the embedding output path.
Returns:
GcsUri: The GCS URI for the embedding output path for the specified source node type.
"""
return GcsUri.join(
get_embedding_output_path(applied_task_identifier=applied_task_identifier),
f"{edge_type.src_node_type}_{edge_type.relation}_{edge_type.dst_node_type}",
f"src_embeddings",
)
[docs]
def get_embedding_output_path_for_dst_node(
applied_task_identifier: AppliedTaskIdentifier, edge_type: EdgeType
):
"""
Returns the GCS URI for the embedding output path for a specific source node type.
Args:
applied_task_identifier: The identifier of the applied task.
edge_type: The edge type for which to get the embedding output path.
Returns:
GcsUri: The GCS URI for the embedding output path for the specified source node type.
"""
return GcsUri.join(
get_embedding_output_path(applied_task_identifier=applied_task_identifier),
f"{edge_type.src_node_type}_{edge_type.relation}_{edge_type.dst_node_type}",
f"dst_embeddings",
)
[docs]
def get_enumerated_config_output_path(
applied_task_identifier: AppliedTaskIdentifier,
) -> GcsUri:
"""
Returns the GCS URI for the path to the config to be run post enumeration of data.
Args:
applied_task_identifier: The identifier of the applied task.
Returns:
GcsUri: The GCS URI for the staging path of the applied task.
"""
return GcsUri.join(
get_applied_task_staging_path(applied_task_identifier=applied_task_identifier),
"post_enumeration_config.yaml",
)