Skip to content

euroeval.dataset_configs.icelandic

docs module euroeval.dataset_configs.icelandic

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
"""All Icelandic dataset configurations used in EuroEval."""

from ..data_models import DatasetConfig
from ..languages import IS
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM

### Official datasets ###

HOTTER_AND_COLDER_SENTIMENT_CONFIG = DatasetConfig(
    name="hotter-and-colder-sentiment",
    pretty_name="the sentiment classification part of the Icelandic dataset Hotter "
    "and Colder",
    huggingface_id="EuroEval/hotter-and-colder-sentiment",
    task=SENT,
    languages=[IS],
)

SCALA_IS_CONFIG = DatasetConfig(
    name="scala-is",
    pretty_name="the Icelandic part of the linguistic acceptability dataset ScaLA",
    huggingface_id="EuroEval/scala-is",
    task=LA,
    languages=[IS],
)

MIM_GOLD_NER_CONFIG = DatasetConfig(
    name="mim-gold-ner",
    pretty_name="the truncated version of the Icelandic named entity recognition "
    "dataset MIM-GOLD-NER",
    huggingface_id="EuroEval/mim-gold-ner-mini",
    task=NER,
    languages=[IS],
)

NQII_CONFIG = DatasetConfig(
    name="nqii",
    pretty_name="the truncated version of the Icelandic reading comprehension dataset "
    "Natural Questions in Icelandic",
    huggingface_id="EuroEval/nqii-mini",
    task=RC,
    languages=[IS],
)

RRN_CONFIG = DatasetConfig(
    name="rrn",
    pretty_name="the truncated version of the Icelandic summarisation dataset "
    "RÚV Radio News",
    huggingface_id="EuroEval/rrn-mini",
    task=SUMM,
    languages=[IS],
)

ICELANDIC_KNOWLEDGE_CONFIG = DatasetConfig(
    name="icelandic-knowledge",
    pretty_name="the Icelandic knowledge dataset IcelandicKnowledge, derived from the "
    "IcelandicQA dataset",
    huggingface_id="EuroEval/icelandic-knowledge",
    task=KNOW,
    languages=[IS],
)

WINOGRANDE_IS_CONFIG = DatasetConfig(
    name="winogrande-is",
    pretty_name="the Icelandic common-sense reasoning dataset "
    "Winogrande-is, manually translated from the English Winogrande dataset",
    huggingface_id="EuroEval/winogrande-is",
    task=COMMON_SENSE,
    languages=[IS],
)


### Unofficial datasets ###

ICE_EC_CONFIG = DatasetConfig(
    name="ice-ec",
    pretty_name="the truncated version of the Icelandic Error Corpus",
    huggingface_id="EuroEval/ice-ec",
    task=LA,
    languages=[IS],
    unofficial=True,
)

ICE_EC_FULL_CONFIG = DatasetConfig(
    name="ice-ec-full",
    pretty_name="the Icelandic Error Corpus",
    huggingface_id="EuroEval/ice-ec-full",
    task=LA,
    languages=[IS],
    unofficial=True,
)

ICE_LINGUISTIC_CONFIG = DatasetConfig(
    name="ice-linguistic",
    pretty_name="the Icelandic linguistic acceptability dataset IceLinguistic",
    huggingface_id="EuroEval/ice-linguistic",
    task=LA,
    languages=[IS],
    unofficial=True,
)

ICELANDIC_QA_CONFIG = DatasetConfig(
    name="icelandic-qa",
    pretty_name="the Icelandic reading comprehension dataset IcelandicQA",
    huggingface_id="EuroEval/icelandic-qa",
    task=RC,
    languages=[IS],
    unofficial=True,
)

MMLU_IS_CONFIG = DatasetConfig(
    name="mmlu-is",
    pretty_name="the truncated version of the Icelandic knowledge dataset MMLU-is, "
    "translated from the English MMLU dataset",
    huggingface_id="EuroEval/mmlu-is-mini",
    task=KNOW,
    languages=[IS],
    unofficial=True,
)

ARC_IS_CONFIG = DatasetConfig(
    name="arc-is",
    pretty_name="the truncated version of the Icelandic knowledge dataset ARC-is, "
    "translated from the English ARC dataset",
    huggingface_id="EuroEval/arc-is-mini",
    task=KNOW,
    languages=[IS],
    unofficial=True,
)

HELLASWAG_IS_CONFIG = DatasetConfig(
    name="hellaswag-is",
    pretty_name="the truncated version of the Icelandic common-sense reasoning dataset "
    "HellaSwag-is, translated from the English HellaSwag dataset",
    huggingface_id="EuroEval/hellaswag-is-mini",
    task=COMMON_SENSE,
    languages=[IS],
    unofficial=True,
)

BELEBELE_IS_CONFIG = DatasetConfig(
    name="belebele-is",
    pretty_name="the Icelandic multiple choice reading comprehension dataset "
    "BeleBele-is, translated from the English BeleBele dataset",
    huggingface_id="EuroEval/belebele-is-mini",
    task=MCRC,
    languages=[IS],
    unofficial=True,
)