euroeval.types¶

[docs] module euroeval.types
"""Types used throughout the project."""import collections.abc as cimport typing as tfrom transformers importPreTrainedTokenizerfrom transformers.trainer_utils importEvalPredictiontry:from transformers.tokenization_mistral_common importMistralCommonTokenizerexceptImportError:from transformers.tokenization_mistral_common import(MistralCommonBackendasMistralCommonTokenizer,)ift.TYPE_CHECKING:from datasets.arrow_dataset importDatasetfrom numpy.typing importNDArrayfrom pydantic importBaseModelfrom .data_models importBenchmarkConfig,GenerativeModelOutputScoreDict:t.TypeAlias=dict[str,dict[str,float]|c.Sequence[dict[str,float]]]Predictions:t.TypeAlias="NDArray | c.Sequence[str] | c.Sequence[c.Sequence[str]]"Labels:t.TypeAlias="NDArray | c.Sequence[str] | c.Sequence[c.Sequence[str]]"Tokeniser:t.TypeAlias=PreTrainedTokenizer|MistralCommonTokenizerclass ComputeMetricsFunction(t.Protocol):[docs]
    """A function used to compute the metrics."""def __call__(self,model_outputs_and_labels:EvalPrediction|tuple["NDArray | c.Sequence[str] | c.Sequence[c.Sequence[str]]","NDArray | c.Sequence[str] | c.Sequence[c.Sequence[str]]",],dataset:"Dataset",benchmark_config:"BenchmarkConfig",)->dict[str,float]:        """Compute the metrics.        Args:            model_outputs_and_labels:                The model outputs and labels.            dataset:                The dataset used for evaluation. This is only used in case any                additional metadata is used to compute the metrics.        Returns:            The computed metrics.        """...class ExtractLabelsFunction(t.Protocol):[docs]
    """A function used to extract the labels from the generated output."""def __call__(self,input_batch:dict[str,list],model_output:"GenerativeModelOutput")->c.Sequence[str]:        """Extract the labels from the generated output.        Args:            input_batch:                The input batch.            model_output:                The model output.        Returns:            The extracted labels.        """...class ScoringFunction(t.Protocol):[docs]
    """A function used to compute a score from a single model output."""def __call__(self,output:"BaseModel")->float:        """Compute a score from a model output.        Args:            output:                A model output (Pydantic model) from the judge.        Returns:            A float score computed from the output.        """...class BatchScoringFunction(t.Protocol):[docs]
    """A function used to compute batch scores from model outputs."""def __call__(self,outputs:list["BaseModel"],dataset:"Dataset | None"=None)->float:        """Compute a batch score from model outputs.        Args:            outputs:                List of model outputs (Pydantic models) from the judge.            dataset:                Optional dataset used for evaluation. Can be used for additional                context when computing the score.        Returns:            A float score computed from the batch of outputs.        """...def is_list_of_int(x:object)->t.TypeGuard[c.Sequence[int]]:[docs]
    """Check if an object is a list of integers.    Args:        x:            The object to check.    Returns:        Whether the object is a list of integers.    """returnisinstance(x,list)andall(isinstance(i,int)foriinx)[docs]
def is_list_of_list_of_int(x:object)->t.TypeGuard[c.Sequence[c.Sequence[int]]]:    """Check if an object is a list of list of integers.    Args:        x:            The object to check.    Returns:        Whether the object is a list of list of integers.    """return(isinstance(x,list)andall(isinstance(i,list)foriinx)andall(isinstance(j,int)foriinxforjini))def is_list_of_str(x:object)->t.TypeGuard[c.Sequence[str]]:[docs]
    """Check if an object is a list of integers.    Args:        x:            The object to check.    Returns:        Whether the object is a list of strings.    """returnisinstance(x,list)andall(isinstance(i,str)foriinx)