Source code for gigl.src.common.models.layers.feature_interaction

from typing import Optional

import torch
import torch.nn as nn


[docs] class DCNCross(nn.Module): """ Derived from tensorflow_recommenders [implementation](https://www.tensorflow.org/recommenders/api_docs/python/tfrs/layers/dcn/Cross) Cross Layer in Deep & Cross Network to learn explicit feature interactions. A layer that creates explicit and bounded-degree feature interactions efficiently. The `call` method accepts `inputs` as a tuple of size 2 tensors. The first input `x0` is the base layer that contains the original features (usually the embedding layer); the second input `xi` is the output of the previous `DCNCross` layer in the stack, i.e., the i-th `DCNCross` layer. For the first `DCNCross` layer in the stack, x0 = xi. The output is x_{i+1} = x0 .* (W * xi + bias + diag_scale * xi) + xi, where .* designates elementwise multiplication, W could be a full-rank matrix, or a low-rank matrix U*V to reduce the computational cost, and diag_scale increases the diagonal of W to improve training stability (especially for the low-rank case). References: - [R. Wang et al.](https://arxiv.org/pdf/2008.13535.pdf) See Eq. (1) for full-rank and Eq. (2) for low-rank version. - [R. Wang et al.](https://arxiv.org/pdf/1708.05123.pdf) Args: in_dim (int): The input feature dimension. projection_dim (Optional[int]): Projection dimension to reduce the computational cost. Default is `None` such that a full (`in_dim` by `in_dim`) matrix W is used. If enabled, a low-rank matrix W = U*V will be used, where U is of size `in_dim` by `projection_dim` and V is of size `projection_dim` by `in_dim`. `projection_dim` needs to be smaller than `in_dim`/2 to improve the model efficiency. In practice, we've observed that `projection_dim` = d/4 consistently preserved the accuracy of a full-rank version. diag_scale (float): A non-negative float used to increase the diagonal of the kernel W by `diag_scale`, that is, W + diag_scale * I, where I is an identity matrix. use_bias (bool): Whether to add a bias term for this layer. If set to False, no bias term will be used. Input shape: A tuple of 2 (batch_size, `in_dim`) dimensional inputs. Output shape: A single (batch_size, `in_dim`) dimensional output. """ def __init__( self, in_dim: int, projection_dim: Optional[int] = None, diag_scale: float = 0.0, use_bias: bool = True, **kwargs, ): super(DCNCross, self).__init__(**kwargs) self._in_dim = in_dim self._projection_dim = projection_dim self._diag_scale = diag_scale self._use_bias = use_bias if self._diag_scale < 0.0: raise ValueError( f"`diag_scale` should be non-negative. Got `diag_scale` = {diag_scale}" ) if self._projection_dim is None: self._lin = nn.Linear(self._in_dim, self._in_dim, bias=self._use_bias) else: self._lin_u = nn.Linear( self._in_dim, self._projection_dim, bias=self._use_bias ) self._lin_v = nn.Linear( self._projection_dim, self._in_dim, bias=self._use_bias )
[docs] def forward( self, x0: torch.Tensor, x: Optional[torch.Tensor] = None ) -> torch.Tensor: """ Computes the feature cross. Args: x0: The input tensor x: Optional second input tensor. If provided, the layer will compute crosses between x0 and x; if not provided, the layer will compute crosses between x0 and itself. Returns: Tensor of crosses. """ if x is None: x = x0 if x0.shape[-1] != x.shape[-1]: raise ValueError( f"`x0` and `x` dimension mismatch! Got `x0` dimension {x0.shape[-1]}, and x " "dimension {x.shape[-1]}. This case is not supported yet." ) if self._projection_dim is None: prod_output = self._lin(x) else: prod_output = self._lin_v(self._lin_u(x)) if self._diag_scale: prod_output += self._diag_scale * x return x0 * prod_output + x
[docs] def reset_parameters(self): if self._projection_dim is None: self._lin.reset_parameters() else: self._lin_u.reset_parameters() self._lin_v.reset_parameters()
def __repr__(self) -> str: return f"{self.__class__.__name__}(in_dim={self._in_dim}, projection_dim={self._projection_dim}, diag_scale={self._diag_scale}, use_bias={self._use_bias})"
[docs] class DCNv2(nn.Module): """ Wraps around `DCNCross` for multi-layer feature crossing. See documentation for `DCNCross` for more details. Args: in_dim (int): The input feature dimension. num_layers (int): How many feature crossing layers to use. K layers will produce as high as (K+1)-order features. projection_dim (Optional[int]): Projection dimension to reduce the computational cost. Default is `None` such that a full (`in_dim` by `in_dim`) matrix W is used. If enabled, a low-rank matrix W = U*V will be used, where U is of size `in_dim` by `projection_dim` and V is of size `projection_dim` by `in_dim`. `projection_dim` needs to be smaller than `in_dim`/2 to improve the model efficiency. In practice, we've observed that `projection_dim` = d/4 consistently preserved the accuracy of a full-rank version. diag_scale (float): A non-negative float used to increase the diagonal of the kernel W by `diag_scale`, that is, W + diag_scale * I, where I is an identity matrix. use_bias (bool): Whether to add a bias term for this layer. If set to False, no bias term will be used. """ def __init__( self, in_dim: int, num_layers: int = 1, projection_dim: Optional[int] = None, diag_scale: float = 0.0, use_bias: bool = True, **kwargs, ): super().__init__(**kwargs) self._in_dim = in_dim self._num_layers = num_layers self._projection_dim = projection_dim self._diag_scale = diag_scale self._use_bias = use_bias self._layers = nn.ModuleList() for _ in range(num_layers): self._layers.append( DCNCross( self._in_dim, projection_dim=self._projection_dim, diag_scale=self._diag_scale, use_bias=self._use_bias, **kwargs, ) )
[docs] def forward(self, x: torch.Tensor) -> torch.Tensor: x0, xl = x, x for i in range(self._num_layers): xl = self._layers[i](x0, xl) return xl
[docs] def reset_parameters(self): for layer in self._layers: layer.reset_parameters()
def __repr__(self) -> str: return f"{self.__class__.__name__}(in_dim={self._in_dim}, num_layers={self._num_layers}, projection_dim={self._projection_dim}, diag_scale={self._diag_scale}, use_bias={self._use_bias})"