-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrtp-lx_metadata.json
152 lines (152 loc) · 5.71 KB
/
rtp-lx_metadata.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
{
"name": "rtp-lx",
"title": "",
"description": "RTP-LX is a dataset of toxic prompts and passages in 26 languages. It is designed for toxic language detection. Each prompt has been transcreated and annotated by native speakers, and includes both usual offensive content in addition to manually-designed prompts containing offensive content specific to the culture.",
"doi": "",
"version": "0.0.1",
"created": "September 2023",
"licenses": [
{
"title": "Creative Commons Attribution-NonCommercial 4.0",
"name": "CC-BY-NC-4.0",
"path": "https://creativecommons.org/licenses/by-nc/4.0/"
},
{
"title": "MIT License",
"name": "MIT",
"path": "https://opensource.org/licenses/MIT"
},
{
"title": "Apache Software License 2.0",
"name": "Apache-2.0",
"path": "https://opensource.org/licenses/Apache-2.0"
}
],
"sources": [],
"resources": [
{
"name": "RTP-LX/*.json",
"description": "Files corresponding to RTP-LX on each language",
"path": "RTP-LX",
"doi": "",
"format": "JSON",
"mediatype": "text/markdown",
"bytes": "",
"date": "September 2023",
"encoding": "",
"version": "0.0.1",
"language": "",
"schema": [
{
"name": "Prompt",
"type": "string",
"description": "The prompt"
},
{
"name": "Completion",
"type": "string",
"description": "A completion based on the prompt"
},
{
"name": "Index",
"type": "integer",
"description": ""
},
{
"name": "Locale",
"type": "enum",
"description": "Main language of the prompt/completion"
},
{
"name": "Dialect",
"type": "string",
"description": "Dialect of the prompt/completion"
},
{
"name": "OriginalMetadata",
"type": "Json",
"description": "Any of the original metadata if the file has a correspondence with the original RTP"
},
{
"name": "PromptAnnotations",
"type": "Json",
"description": "The annotations for the prompt. All Likert scale (1-5)\nCategories:\n- Toxicity\n- IdentityAttack\n- Bias\n- Microaggression\n- Violence\n- SelfHarm\n- Insult\n- SexualContent\n"
},
{
"name": "PromptAnnotationsRaw",
"type": "Json",
"description": "The raw annotations for the prompt, as an array. Each entry is in a Likert scale (1-5)\nCategories:\n- Toxicity\n- IdentityAttack\n- Bias\n- Microaggression\n- Violence\n- SelfHarm\n- Insult\n- SexualContent\n"
},
{
"name": "CompletionAnnotations",
"type": "Json",
"description": "The annotations for the completion. All Likert scale (1-5)\nCategories:\n- Toxicity\n- IdentityAttack\n- Bias\n- Microaggression\n- Violence\n- SelfHarm\n- Insult\n- SexualContent\n"
},
{
"name": "CompletionAnnotationsRaw",
"type": "Json",
"description": "The raw annotations for the completion, as an array. Each entry is in a Likert scale (1-5)\nCategories:\n- Toxicity\n- IdentityAttack\n- Bias\n- Microaggression\n- Violence\n- SelfHarm\n- Insult\n- SexualContent\n"
}
],
"dialect": {}
}
],
"contributors": [
{
"title": "Applied Scientist",
"path": "",
"email": "adewynter@microsoft.com",
"role": "author"
}
],
"privacy": [
{
"notice": {
"path": "./README.md",
"description": "Offensive content warning"
},
"useTerms": {
"path": "./README.md",
"description": "This data may not be used in any system that could cause harm, or used in surveillance applications."
},
"sensitivity": {
"description": "This data is extremely toxic and only intended for toxic language detection",
"types": []
},
"confidentiality": {
"path": "",
"description": ""
},
"assessments": []
}
],
"security": [],
"procedures": {
"collection": [],
"processing": [],
"update": [
{
"contributor": [
{
"title": "Applied Scientist",
"path": "",
"email": "adewynter@microsoft.com",
"role": "author"
}
],
"path": "RTP-LX/*.json",
"description": "Four updates are planned outside of any maintenance updates, to include the rest of the languages and the passages."
}
]
},
"use": [
{
"description": "Tests for service block rates",
"examples": []
},
{
"description": "Toxicity analysis in a multilingual scenario",
"examples": []
}
]
}