1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123 | """All Dutch dataset configurations used in EuroEval."""
from ..data_models import DatasetConfig
from ..languages import NL
from ..tasks import COMMON_SENSE, KNOW, LA, MCRC, NER, RC, SENT, SUMM
### Official datasets ###
DUTCH_SOCIAL_CONFIG = DatasetConfig(
name="dutch-social",
pretty_name="the truncated version of the Dutch sentiment classification "
"dataset Dutch Social",
huggingface_id="EuroEval/dutch-social-mini",
task=SENT,
languages=[NL],
)
SCALA_NL_CONFIG = DatasetConfig(
name="scala-nl",
pretty_name="the Dutch part of the linguistic acceptability dataset ScaLA",
huggingface_id="EuroEval/scala-nl",
task=LA,
languages=[NL],
)
CONLL_NL_CONFIG = DatasetConfig(
name="conll-nl",
pretty_name="the Dutch part of the truncated version of the named entity "
"recognition dataset CoNLL 2002",
huggingface_id="EuroEval/conll-nl-mini",
task=NER,
languages=[NL],
)
SQUAD_NL_CONFIG = DatasetConfig(
name="squad-nl",
pretty_name="the truncated version of the Dutch reading comprehension dataset "
"SQuAD-nl, translated from the English SQuAD dataset",
huggingface_id="EuroEval/squad-nl-v2-mini",
task=RC,
languages=[NL],
)
WIKI_LINGUA_NL_CONFIG = DatasetConfig(
name="wiki-lingua-nl",
pretty_name="the Dutch part of the truncated version of the summarisation dataset "
"WikiLingua",
huggingface_id="EuroEval/wiki-lingua-nl-mini",
task=SUMM,
languages=[NL],
)
MMLU_NL_CONFIG = DatasetConfig(
name="mmlu-nl",
pretty_name="the truncated version of the Dutch knowledge dataset MMLU-nl, "
"translated from the English MMLU dataset",
huggingface_id="EuroEval/mmlu-nl-mini",
task=KNOW,
languages=[NL],
)
HELLASWAG_NL_CONFIG = DatasetConfig(
name="hellaswag-nl",
pretty_name="the truncated version of the Dutch common-sense reasoning dataset "
"HellaSwag-nl, translated from the English HellaSwag dataset",
huggingface_id="EuroEval/hellaswag-nl-mini",
task=COMMON_SENSE,
languages=[NL],
)
### Unofficial datasets ###
DBRD_CONFIG = DatasetConfig(
name="dbrd",
pretty_name="the truncated version of the Dutch sentiment classification "
"dataset DBRD",
huggingface_id="EuroEval/dbrd-mini",
task=SENT,
languages=[NL],
_labels=["negative", "positive"],
_prompt_label_mapping=dict(positive="positief", negative="negatief"),
unofficial=True,
)
DUTCH_COLA_CONFIG = DatasetConfig(
name="dutch-cola",
pretty_name="the truncated version of the Dutch linguistic acceptability dataset "
"Dutch CoLA",
huggingface_id="EuroEval/dutch-cola",
task=LA,
languages=[NL],
unofficial=True,
)
DUTCH_COLA_FULL_CONFIG = DatasetConfig(
name="dutch-cola-full",
pretty_name="the Dutch linguistic acceptability dataset Dutch CoLA",
huggingface_id="EuroEval/dutch-cola-full",
task=LA,
languages=[NL],
unofficial=True,
)
ARC_NL_CONFIG = DatasetConfig(
name="arc-nl",
pretty_name="the truncated version of the Dutch knowledge dataset ARC-nl, "
"translated from the English ARC dataset",
huggingface_id="EuroEval/arc-nl-mini",
task=KNOW,
languages=[NL],
unofficial=True,
)
BELEBELE_NL_CONFIG = DatasetConfig(
name="belebele-nl",
pretty_name="the Dutch multiple choice reading comprehension dataset BeleBele-nl, "
"translated from the English BeleBele dataset",
huggingface_id="EuroEval/belebele-nl-mini",
task=MCRC,
languages=[NL],
unofficial=True,
)
|