Source code for flambe.nlp.transformers.model

# type: ignore[override]

from typing import Optional, Any

import torch
from transformers import AutoModel

from flambe.nn import Module

[docs]class PretrainedTransformerEmbedder(Module): """Embedder intergation of the transformers library. Instantiate this object using any alias available in the `transformers` library. More information can be found here: """ def __init__(self, alias: str, cache_dir: Optional[str] = None, padding_idx: Optional[int] = None, pool: bool = False, **kwargs) -> None: """Initialize from a pretrained model. Parameters ---------- alias: str Alias of a pretrained model. cache_dir: str, optional A directory where to cache the downloaded vocabularies. padding_idx: int, optional The padding index used to compute the attention mask. pool: optional, optional Whether to return the pooled output or the full sequence encoding. Default ``False``. """ super().__init__() if 'gpt2' in alias and pool: raise ValueError('GPT2 does not support pooling.') embedder = AutoModel.from_pretrained(alias, cache_dir=cache_dir, **kwargs) self.config = embedder.config self._embedder = embedder self.padding_idx = padding_idx self.pool = pool
[docs] def forward(self, data: torch.Tensor, token_type_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None) -> torch.Tensor: """Perform a forward pass through the network. If pool was provided, will only return the pooled output of shape [B x H]. Otherwise, returns the full sequence encoding of shape [S x B x H]. Parameters ---------- data : torch.Tensor The input data of shape [B x S] token_type_ids : Optional[torch.Tensor], optional Segment token indices to indicate first and second portions of the inputs. Indices are selected in ``[0, 1]``: ``0`` corresponds to a `sentence A` token, ``1`` corresponds to a `sentence B` token. Has shape [B x S] attention_mask : Optional[torch.Tensor], optional FloatTensor of shape [B x S]. Masked values should be 0 for padding tokens, 1 otherwise. position_ids : Optional[torch.Tensor], optional Indices of positions of each input sequence tokens in the position embedding. Defaults to the order given in the input. Has shape [B x S]. head_mask : Optional[torch.Tensor], optional Mask to nullify selected heads of the self-attention modules. Should be 0 for heads to mask, 1 otherwise. Has shape [num_layers x num_heads] Returns ------- torch.Tensor If pool is True, returns a tneosr of shape [B x H], else returns an encoding for each token in the sequence of shape [B x S x H]. """ if attention_mask is None and self.padding_idx is not None: attention_mask = (data != self.padding_idx).float() outputs = self._embedder(data, token_type_ids=token_type_ids, attention_mask=attention_mask, position_ids=position_ids, head_mask=head_mask) output = outputs[0] if not self.pool else outputs[1] return output
[docs] def __getattr__(self, name: str) -> Any: """Override getattr to inspect config. Parameters ---------- name : str The attribute to fetch Returns ------- Any The attribute """ try: return super().__getattr__(name) except AttributeError as e: config = self.__dict__['config'] if hasattr(config, name): return getattr(config, name) else: raise e