Source code for flambe.tokenizer.tokenizer

from abc import abstractmethod
from typing import List

from flambe import Component


[docs]class Tokenizer(Component): """Base interface to a Tokenizer object. Tokenizers implement the `tokenize` method, which takes a string as input and produces a list of strings as output. """ @abstractmethod
[docs] def tokenize(self, example: str) -> List[str]: """Tokenize an input example. Parameters ---------- example : str The input example, as a string Returns ------- List[str] The output tokens, as a list of strings """ pass
[docs] def __call__(self, example: str): """Make a tokenizer callable.""" return self.tokenize(example)