euroeval.metrics.base

[docs] module euroeval.metrics.base
"""The abstract base class for all metrics."""import abcimport collections.abc as cimport typing as tift.TYPE_CHECKING:from datasets.arrow_dataset importDatasetfrom ..data_models importBenchmarkConfig,DatasetConfigclass Metric(abc.ABC):[docs]
    """Abstract base class for all metrics."""def __init__(self,name:str,pretty_name:str,postprocessing_fn:t.Callable[[float],tuple[float,str]]|None=None,)->None:        """Initialise the metric.        Args:            name:                The name of the metric in snake_case.            pretty_name:                The pretty name of the metric, used for display purposes.            postprocessing_fn:                A function to apply to the metric scores after they are computed,                taking the score to the postprocessed score along with its string                representation. Defaults to x -> (100 * x, f"{x:.2%}").        """self.name=nameself.pretty_name=pretty_nameself.postprocessing_fn=(postprocessing_fnifpostprocessing_fnisnotNoneelselambdax:(100*x,f"{x:.2%}"))def download(self,cache_dir:str)->"Metric":[docs]
        """Initiates the download of the metric if needed.        Args:            cache_dir:                The directory where the metric will be downloaded to.        Returns:            The metric object itself.        """returnself@abc.abstractmethoddef __call__(self,predictions:c.Sequence,references:c.Sequence,dataset:"Dataset",dataset_config:"DatasetConfig",benchmark_config:"BenchmarkConfig",)->float|None:        """Calculate the metric score.        Args:            predictions:                The model predictions.            references:                The ground truth references.            dataset:                The dataset used for evaluation. This is only used in case any                additional metadata is used to compute the metrics.            dataset_config:                The dataset configuration.            benchmark_config:                The benchmark configuration.        Returns:            The calculated metric score, or None if the score should be ignored.        """...def __hash__(self)->int:        """Return a hash of the metric configuration."""returnhash(self.name)