Source code for gigl.src.data_preprocessor.lib.ingest.reference
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional
from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType
from gigl.src.data_preprocessor.lib.types import InstanceDictPTransform
# Type hints for abstract dataclasses are currently not supported. https://github.com/python/mypy/issues/5374
@dataclass(frozen=True)  # type: ignore
[docs]
class DataReference(ABC):
    """
    Contains a URI string to the data reference, and provides a means of yielding
    instance dicts via a beam PTransform.
    A single DataReference is currently assumed to have data relevant to a *single* node or edge type.
    A single DataReference *cannot* currently house mixed-type data.
    """
    @abstractmethod
 
@dataclass(frozen=True)  # type: ignore
[docs]
class NodeDataReference(DataReference, ABC):
    """
    DataReference which stores node data.
    """
    # TODO (mkolodner-sc): Currently this field is always overridden by the identifier from the corresponding node data preprocessing spec.
    # Investigate whether we can remove this field from the user API.
[docs]
    identifier: Optional[str] = None 
    def __repr__(self) -> str:
        return f"NodeDataReference(node_type={self.node_type}, identifier={self.identifier}, reference_uri={self.reference_uri})" 
@dataclass(frozen=True)  # type: ignore
[docs]
class EdgeDataReference(DataReference, ABC):
    """
    DataReference which stores edge data
    """
[docs]
    edge_usage_type: EdgeUsageType = EdgeUsageType.MAIN 
    # TODO (mkolodner-sc): Currently these fields are always overridden by the src and dst identifiers from the corresponding edge data preprocessing spec.
    # Investigate whether we can remove these fields from the user API.
[docs]
    src_identifier: Optional[str] = None 
[docs]
    dst_identifier: Optional[str] = None 
    def __repr__(self) -> str:
        return f"EdgeDataReference(edge_type={self.edge_type}, src_identifier={self.src_identifier}, dst_identifier={self.dst_identifier}, reference_uri={self.reference_uri})"