Source code for lhotse.cut.text

from dataclasses import dataclass
from typing import Any, Dict, Optional, Union

import numpy as np

from lhotse.custom import CustomFieldMixin


[docs] @dataclass class TextExample(CustomFieldMixin): """ Represents a single text example. Useful e.g. for language modeling. """ text: str tokens: Optional[np.ndarray] = None custom: Optional[Dict[str, Any]] = None @property def num_tokens(self) -> Optional[int]: if self.tokens is None: return None return len(self.tokens)
[docs] @dataclass class TextPairExample(CustomFieldMixin): """ Represents a pair of text examples. Useful e.g. for sequence-to-sequence tasks. """ source: TextExample target: TextExample custom: Optional[Dict[str, Any]] = None @property def num_tokens(self) -> Optional[int]: return self.source.num_tokens