Skip to content

euroeval

[docs] package euroeval

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""EuroEval - A benchmarking framework for language models."""

# STAGE 1 ###
# Block unwanted terminal output that happens on importing external modules ###

import importlib.util
import logging
import os
import sys
import warnings

from termcolor import colored

# Block specific warnings before importing anything else, as they can be noisy
if os.getenv("FULL_LOG") != "1":
    warnings.filterwarnings("ignore", category=UserWarning)
    warnings.filterwarnings("ignore", category=FutureWarning)
    logging.getLogger("httpx").setLevel(logging.CRITICAL)
    logging.getLogger("datasets").setLevel(logging.CRITICAL)
    logging.getLogger("vllm").setLevel(logging.CRITICAL)
    os.environ["VLLM_CONFIGURE_LOGGING"] = "0"

# Set up logging
# fmt = colored("%(asctime)s", "light_blue") + " ⋅ " + colored("%(message)s", "green")
fmt = colored("%(message)s", "light_yellow")
logging.basicConfig(
    level=logging.CRITICAL if hasattr(sys, "_called_from_test") else logging.INFO,
    format=fmt,
    datefmt="%Y-%m-%d %H:%M:%S",
)


# STAGE 2 ###
# Check for incompatible packages ###

# Throw informative error if `flash_attn` is installed on non-ROCm PyTorch builds.
# On ROCm builds the package does not cause the same conflicts, so we skip this check
# to allow evaluation to proceed (vLLM will fall back to TRITON_ATTN there).
if importlib.util.find_spec("flash_attn") is not None:
    try:
        import torch as _torch

        _is_rocm = _torch.version.hip is not None
    except (ImportError, AttributeError):
        _is_rocm = False
    if not _is_rocm:
        logging.critical(
            "The `flash_attn` package is not supported by EuroEval, as it is now "
            "built into the other packages and it conflicts with the other "
            "implementations. Please uninstall it using `pip uninstall flash_attn` "
            "and try again."
        )
        sys.exit(1)


# STAGE 3 ###
# Set the rest up ###

import importlib.metadata  # noqa: E402

from dotenv import load_dotenv  # noqa: E402

from .benchmarker import Benchmarker  # noqa: E402
from .data_models import DatasetConfig  # noqa: E402
from .logging_utils import block_terminal_output  # noqa: E402
from .tasks import (  # noqa: E402
    MULTIPLE_CHOICE,
    TEXT_CLASSIFICATION,
    TOKEN_CLASSIFICATION,
)

# Block unwanted terminal outputs. This blocks way more than the above, but since it
# relies on importing from the `utils` module, external modules are already imported
# before this is run, necessitating the above block as well
block_terminal_output()


# Fetches the version of the package as defined in pyproject.toml
__version__ = importlib.metadata.version("euroeval")


# Loads environment variables
load_dotenv()


# Disable parallelisation when tokenizing, as that can lead to errors
os.environ["TOKENIZERS_PARALLELISM"] = "false"


# Enable MPS fallback
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"


# Set amount of threads per GPU - this is the default and is only set to prevent a
# warning from showing
os.environ["OMP_NUM_THREADS"] = "1"


# Avoid the "Cannot re-initialize CUDA in forked subprocess" error - see
# https://github.com/vllm-project/vllm/issues/6152 for more
os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"


# Allow long max model length in vLLM. This happens when vLLM registers that the model
# has a shorter context length than the value we are inserting. But since we do a
# thorough check of the model's config before setting the context length, we trust our
# own checks and ignore the internal vLLM check.
os.environ["VLLM_ALLOW_LONG_MAX_MODEL_LEN"] = "1"


# Avoid the "Unclosed client session" error when evaluating Ollama models with LiteLLM.
# The error comes from the `aiohttp` package, and this environment variable forces the
# use of `httpx` instead.
# Link: https://github.com/BerriAI/litellm/issues/11657#issuecomment-3038984975
os.environ["DISABLE_AIOHTTP_TRANSPORT"] = "True"


# Set the HF_TOKEN env var to copy the HUGGINGFACE_API_KEY env var, as vLLM uses the
# former and LiteLLM uses the latter
if os.getenv("HUGGINGFACE_API_KEY"):
    os.environ["HF_TOKEN"] = os.environ["HUGGINGFACE_API_KEY"]