Source code for gigl.src.data_preprocessor.lib.ingest.reference
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional
from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType
from gigl.src.data_preprocessor.lib.types import InstanceDictPTransform
# Type hints for abstract dataclasses are currently not supported. https://github.com/python/mypy/issues/5374
@dataclass(frozen=True) # type: ignore
[docs]
class DataReference(ABC):
"""
Contains a URI string to the data reference, and provides a means of yielding
instance dicts via a beam PTransform.
A single DataReference is currently assumed to have data relevant to a *single* node or edge type.
A single DataReference *cannot* currently house mixed-type data.
"""
@abstractmethod
@dataclass(frozen=True) # type: ignore
[docs]
class NodeDataReference(DataReference, ABC):
"""
DataReference which stores node data.
"""
[docs]
identifier: Optional[str] = None
def __repr__(self) -> str:
return f"NodeDataReference(node_type={self.node_type}, identifier={self.identifier}, reference_uri={self.reference_uri})"
@dataclass(frozen=True) # type: ignore
[docs]
class EdgeDataReference(DataReference, ABC):
"""
DataReference which stores edge data
"""
[docs]
edge_usage_type: EdgeUsageType = EdgeUsageType.MAIN
[docs]
src_identifier: Optional[str] = None
[docs]
dst_identifier: Optional[str] = None
def __repr__(self) -> str:
return f"EdgeDataReference(edge_type={self.edge_type}, src_identifier={self.src_identifier}, dst_identifier={self.dst_identifier}, reference_uri={self.reference_uri})"