Source code for gigl.src.applied_tasks.test_tasks.academic
from torch_geometric.datasets.planetoid import Planetoid
from gigl.common.logger import Logger
from gigl.src.mocking.lib.pyg_datasets_forks import CoraFromGCS
[docs]
def log_stats_for_pyg_planetoid_dataset(dataset: Planetoid):
    logger.info(f"Dataset Info: {dataset}:")
    logger.info("======================")
    logger.info(f"Number of graphs: {len(dataset)}")
    logger.info(f"Number of features: {dataset.num_features}")
    logger.info(f"Number of classes: {dataset.num_classes}")
    logger.info("\n\n======================")
    data = dataset[0]
    logger.info(f"First graph info for dataset: {data}")
    logger.info("======================")
    # Gather some statistics about the graph.
    logger.info(f"Number of nodes: {data.num_nodes}")
    logger.info(f"Number of edges: {data.num_edges}")
    logger.info(f"Average node degree: {data.num_edges / data.num_nodes:.2f}")
    logger.info(f"Number of training nodes: {data.train_mask.sum()}")
    logger.info(f"Number of validation nodes: {data.val_mask.sum()}")
    logger.info(f"Number of testing nodes: {data.test_mask.sum()}")
    logger.info(
        f"""Training node label rate: {int(
            data.train_mask.sum() + data.test_mask.sum() + data.val_mask.sum()
        ) / data.num_nodes:.2f}"""
    )
    logger.info(f"Has isolated nodes: {data.has_isolated_nodes()}")
    logger.info(f"Has self-loops: {data.has_self_loops()}")
    logger.info(f"Is undirected: {data.is_undirected()}") 
[docs]
def get_pyg_cora_dataset(store_at: str = "/tmp/Cora") -> Planetoid:
    """Cora graph is the graph in the first index in the returned dataset
    i.e. the Planetoid object is subscriptable, data = dataset[0]
    Train and tests masks are defined by `train_mask` and `test_mask`` properties on data.
    Returns:
        torch_geometric.datasets.planetoid.Planetoid
    """
    # Fetch the dataset
    dataset = CoraFromGCS(root=store_at, name="Cora")
    return dataset