Source code for gigl.src.data_preprocessor.lib.enumerate.queries
[docs]
DEFAULT_ORIGINAL_NODE_ID_FIELD = "node_id" 
[docs]
DEFAULT_ENUMERATED_NODE_ID_FIELD = "int_id" 
[docs]
UNIQUE_NODE_ENUMERATION_QUERY = """
WITH
  unique_nodes AS (
    SELECT DISTINCT {bq_source_table_node_id_col_name} as {original_node_id_field} FROM `{bq_source_table_name}`
  )
SELECT
  {original_node_id_field},
  ROW_NUMBER() OVER(ORDER BY {original_node_id_field}) - 1 AS {enumerated_int_id_field}
FROM
  unique_nodes
""" 
[docs]
NODE_FEATURES_ENUMERATION_QUERY = """
WITH
  unmapped_node_features AS
  (
    SELECT * FROM `{bq_node_features}`
  ),
  enumerated AS
  (
  SELECT
    {original_node_id_field},
    {enumerated_int_id_field}
  FROM
    `{bq_enumerated_node_ids}`
  ),
  mapped_node_features AS (
  SELECT
    enumerated.{enumerated_int_id_field} as {node_id_col},
    unmapped_node_features.* EXCEPT ({node_id_col})
  FROM
    enumerated
  INNER JOIN
    unmapped_node_features
  ON
    enumerated.{original_node_id_field} = unmapped_node_features.{node_id_col})
SELECT
  *
FROM
  mapped_node_features
""" 
[docs]
NO_EDGE_FEATURES_GRAPH_EDGELIST_ENUMERATION_QUERY = """
WITH
  unmapped_graph AS
  (
    SELECT {src_node_id_col}, {dst_node_id_col} FROM `{bq_graph}`
  )
SELECT
  (
    SELECT {enumerated_int_id_field}
    FROM `{src_enumerated_node_ids}`
    WHERE {original_node_id_field} = unmapped_graph.{src_node_id_col}
  ) as {src_node_id_col},
  (
    SELECT {enumerated_int_id_field}
    FROM `{dst_enumerated_node_ids}`
    WHERE {original_node_id_field} = unmapped_graph.{dst_node_id_col}
  ) as {dst_node_id_col},
FROM unmapped_graph
""" 
[docs]
EDGE_FEATURES_GRAPH_EDGELIST_ENUMERATION_QUERY = """
WITH
  unmapped_graph AS
  (
    SELECT
      {src_node_id_col},
      {dst_node_id_col},
      * EXCEPT({src_node_id_col}, {dst_node_id_col})
    FROM
      `{bq_graph}`
  )
SELECT
  (
    SELECT {enumerated_int_id_field}
    FROM `{src_enumerated_node_ids}`
    WHERE {original_node_id_field} = unmapped_graph.{src_node_id_col}
  ) as {src_node_id_col},
  (
    SELECT {enumerated_int_id_field}
    FROM `{dst_enumerated_node_ids}`
    WHERE {original_node_id_field} = unmapped_graph.{dst_node_id_col}
  ) as {dst_node_id_col},
  * EXCEPT({src_node_id_col}, {dst_node_id_col})
FROM unmapped_graph
"""