Source code for gigl.src.data_preprocessor.lib.enumerate.queries

[docs] DEFAULT_ORIGINAL_NODE_ID_FIELD = "node_id"
[docs] DEFAULT_ENUMERATED_NODE_ID_FIELD = "int_id"
[docs] UNIQUE_NODE_ENUMERATION_QUERY = """ WITH unique_nodes AS ( SELECT DISTINCT {bq_source_table_node_id_col_name} as {original_node_id_field} FROM `{bq_source_table_name}` ) SELECT {original_node_id_field}, ROW_NUMBER() OVER(ORDER BY {original_node_id_field}) - 1 AS {enumerated_int_id_field} FROM unique_nodes """
[docs] NODE_FEATURES_ENUMERATION_QUERY = """ WITH unmapped_node_features AS ( SELECT * FROM `{bq_node_features}` ), enumerated AS ( SELECT {original_node_id_field}, {enumerated_int_id_field} FROM `{bq_enumerated_node_ids}` ), mapped_node_features AS ( SELECT enumerated.{enumerated_int_id_field} as {node_id_col}, unmapped_node_features.* EXCEPT ({node_id_col}) FROM enumerated INNER JOIN unmapped_node_features ON enumerated.{original_node_id_field} = unmapped_node_features.{node_id_col}) SELECT * FROM mapped_node_features """
[docs] NO_EDGE_FEATURES_GRAPH_EDGELIST_ENUMERATION_QUERY = """ WITH unmapped_graph AS ( SELECT {src_node_id_col}, {dst_node_id_col} FROM `{bq_graph}` ) SELECT ( SELECT {enumerated_int_id_field} FROM `{src_enumerated_node_ids}` WHERE {original_node_id_field} = unmapped_graph.{src_node_id_col} ) as {src_node_id_col}, ( SELECT {enumerated_int_id_field} FROM `{dst_enumerated_node_ids}` WHERE {original_node_id_field} = unmapped_graph.{dst_node_id_col} ) as {dst_node_id_col}, FROM unmapped_graph """
[docs] EDGE_FEATURES_GRAPH_EDGELIST_ENUMERATION_QUERY = """ WITH unmapped_graph AS ( SELECT {src_node_id_col}, {dst_node_id_col}, * EXCEPT({src_node_id_col}, {dst_node_id_col}) FROM `{bq_graph}` ) SELECT ( SELECT {enumerated_int_id_field} FROM `{src_enumerated_node_ids}` WHERE {original_node_id_field} = unmapped_graph.{src_node_id_col} ) as {src_node_id_col}, ( SELECT {enumerated_int_id_field} FROM `{dst_enumerated_node_ids}` WHERE {original_node_id_field} = unmapped_graph.{dst_node_id_col} ) as {dst_node_id_col}, * EXCEPT({src_node_id_col}, {dst_node_id_col}) FROM unmapped_graph """