Source code for flambe.nn.embedder


from typing import Tuple, Union, Optional

from torch import nn
from torch import Tensor

from flambe.compile import registrable_factory
from flambe.nn.module import Module


[docs]class Embeddings(Module, nn.Embedding): """Implement an Embedding module. This object replicates the usage of nn.Embedding but registers the from_pretrained classmethod to be used inside a Flambé configuration, as this does not happen automatically during the registration of PyTorch objects. """ @registrable_factory @classmethod
[docs] def from_pretrained(cls, embeddings: Tensor, freeze: bool = True, paddinx_idx: Optional[int] = None, max_norm: Optional[float] = None, norm_type: float = 2.0, scale_grad_by_freq: bool = False, sparse: bool = False): """Create Embedding instance from given 2-dimensional Tensor. Parameters ---------- embeddings: torch.Tensor FloatTensor containing weights for the Embedding. First dimension is being passed to Embedding as num_embeddings, second as embedding_dim. freeze: bool If True, the tensor does not get updated in the learning process. Default: True padding_idx (int, optional) See module initialization documentation. max_norm: float, optional See module initialization documentation. norm_type: float, optional See module initialization documentation. Default 2. scale_grad_by_freq: bool, optional See module initialization documentation. Default False. sparse (bool, optional) See module initialization documentation. Default False. """ return super().from_pretrained(embeddings, freeze, paddinx_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
[docs]class Embedder(Module): """Implements an Embedder module. An Embedder takes as input a sequence of index tokens, and computes the corresponding embedded representations, and padding mask. The encoder may be initialized using a pretrained embedding matrix. Attributes ---------- embeddings: Embedding The embedding layer encoder: Encoder The sub-encoder that this object is wrapping drop: nn.Dropout The dropout layer """ def __init__(self, embedding: nn.Embedding, encoder: Module, embedding_dropout: float = 0, pad_index: Optional[int] = 0) -> None: """Initializes the TextEncoder module. Extra arguments are passed to the nn.Embedding module. Parameters ---------- embedding: nn.Embedding The embedding layer encoder: Module The encoder embedding_dropout: float, optional Amount of dropout between the embeddings and the encoder pad_index: int, optional Passed the nn.Embedding object. See pytorch documentation. """ super().__init__() self.embedding = embedding self.dropout = nn.Dropout(embedding_dropout) self.encoder = encoder self.pad_index = pad_index
[docs] def forward(self, data: Tensor) -> Union[Tensor, Tuple[Tensor, Tensor]]: """Performs a forward pass through the network. Parameters ---------- data : torch.Tensor The input data, as a float tensor, batch first Returns ------- Union[Tensor, Tuple[Tensor, Tensor]] The encoded output, as a float tensor. May return a state if the encoder is an RNN """ embedded = self.embedding(data) embedded = self.dropout(embedded) if self.pad_index is not None: mask = (data != self.pad_index).float() encoding = self.encoder(embedded, mask=mask) else: encoding = self.encoder(embedded) return encoding