Source code for gigl.src.applied_tasks.test_tasks.academic

from torch_geometric.datasets.planetoid import Planetoid

from gigl.common.logger import Logger
from gigl.src.mocking.lib.pyg_datasets_forks import CoraFromGCS

[docs] logger = Logger()
[docs] def log_stats_for_pyg_planetoid_dataset(dataset: Planetoid): logger.info(f"Dataset Info: {dataset}:") logger.info("======================") logger.info(f"Number of graphs: {len(dataset)}") logger.info(f"Number of features: {dataset.num_features}") logger.info(f"Number of classes: {dataset.num_classes}") logger.info("\n\n======================") data = dataset[0] logger.info(f"First graph info for dataset: {data}") logger.info("======================") # Gather some statistics about the graph. logger.info(f"Number of nodes: {data.num_nodes}") logger.info(f"Number of edges: {data.num_edges}") logger.info(f"Average node degree: {data.num_edges / data.num_nodes:.2f}") logger.info(f"Number of training nodes: {data.train_mask.sum()}") logger.info(f"Number of validation nodes: {data.val_mask.sum()}") logger.info(f"Number of testing nodes: {data.test_mask.sum()}") logger.info( f"""Training node label rate: {int( data.train_mask.sum() + data.test_mask.sum() + data.val_mask.sum() ) / data.num_nodes:.2f}""" ) logger.info(f"Has isolated nodes: {data.has_isolated_nodes()}") logger.info(f"Has self-loops: {data.has_self_loops()}") logger.info(f"Is undirected: {data.is_undirected()}")
[docs] def get_pyg_cora_dataset(store_at: str = "/tmp/Cora") -> Planetoid: """Cora graph is the graph in the first index in the returned dataset i.e. the Planetoid object is subscriptable, data = dataset[0] Train and tests masks are defined by `train_mask` and `test_mask`` properties on data. Returns: torch_geometric.datasets.planetoid.Planetoid """ # Fetch the dataset dataset = CoraFromGCS(root=store_at, name="Cora") return dataset