@inproceedings{6ae46727a649474692e9ca58e863554c,
title = "Large Language Models are Good Annotators for Type-aware Data Augmentation in Grammatical Error Correction",
abstract = "Large Language Models (LLMs) have achieved outstanding performance across various NLP tasks. Grammatical Error Correction (GEC) is a task aiming at automatically correcting grammatical errors in text, but it encounters a severe shortage of annotated data. Researchers have tried to make full use of the generalization capabilities of LLMs and prompt them to correct erroneous sentences, which however results in unexpected over-correction issues. In this paper, we rethink the role of LLMs in GEC tasks and propose a method, namely TypeDA, considering LLMs as the annotators for type-aware data augmentation in GEC tasks. Different from the existing data augmentation methods, our method prevents in-distribution corruption and is able to generate sentences with multi-granularity error types. Our experiments verify that our method can generally improve the GEC performance of different backbone models with only a small amount of augmented data. Further analyses verify the high consistency and diversity of the pseudo data generated via our method. Our code can be accessed via the provided URL.",
author = "Xinyuan Li and Yunshi Lan",
note = "Publisher Copyright: {\textcopyright} 2025 Association for Computational Linguistics.; 31st International Conference on Computational Linguistics, COLING 2025 ; Conference date: 19-01-2025 Through 24-01-2025",
year = "2025",
language = "英语",
series = "Proceedings - International Conference on Computational Linguistics, COLING",
publisher = "Association for Computational Linguistics (ACL)",
pages = "199--213",
editor = "Owen Rambow and Leo Wanner and Marianna Apidianaki and Hend Al-Khalifa and \{Di Eugenio\}, Barbara and Steven Schockaert",
booktitle = "Main Conference",
address = "澳大利亚",
}