@inproceedings{<DOI GOES HERE>,
author = {L\'opez Cortez, Magal\'i and Norris, Mark and Duman, Steve},
title = {GMEG-EXP: A Dataset of Human- and LLM-Generated Explanations of Grammatical and Fluency Edits},
booktitle = {Joint International Conference on Computational Linguistics, Language Resources, and Evaluation},
year = {2024},
month = {May},
doi = {DOI GOES HERE},
URL = {URL GOES HERE},
eprint = {EPRINT GOES HERE},
abstract = {Recent work has explored the ability of large language models (LLMs) to generate explanations of existing labeled data. In this work, we investigate the ability of LLMs to explain revisions in sentences. We introduce a new dataset demonstrating a novel task, which we call explaining text revisions. We collected human- and LLM-generated explanations of grammatical and fluency edits and defined criteria for the human evaluation of the explanations along three dimensions: Coverage, Informativeness, and Correctness. The results of a side-by-side evaluation show an Overall preference for human explanations, but there are many instances in which annotators show no preference. Annotators prefer human-generated explanations for Informativeness and Correctness, but they show no preference for Coverage. We also examined the extent to which the number of revisions in a sentence influences annotators’ Overall preference for the explanations. We found that the preference for human explanations increases as the number of revisions in the sentence increases. Additionally, we show that the Overall preference for human explanations depends on the type of error being explained. We discuss explanation styles based on a qualitative analysis of 300 explanations. We release our dataset and annotation guidelines to encourage future research.}
}
Coming soon!
Coming soon!
Domains are fce
, wiki
, and yahoo
.
Data from the yahoo
domain was sampled from the Yahoo Answers corpus, created from L6 - Yahoo! Answers Comprehensive Questions and Answers version 1.0. This Yahoo Answers corpus can be requested free of charge for research purposes. Access to data from the yahoo
domain will require you to first gain access to this Yahoo Answers corpus.
Once you have gained access to the L6 corpus, please forward the acknowledgment to mark.norris@grammarly.com, along with your affiliation and a short description of how you will be using the data, and we will provide access to data from the yahoo
domain.
Coming soon!