@inproceedings{e2a9738678b7493280e2eb21c52fdc24,
title = "Benchmarking Hallucination in Large Language Models based on Unanswerable Math Word Problem",
abstract = "Large language models (LLMs) are highly effective in various natural language processing (NLP) tasks. However, they are susceptible to producing unreliable conjectures in ambiguous contexts called hallucination. This paper presents a new method for evaluating LLM hallucination in Question Answering (QA) based on the unanswerable math word problem (MWP). To support this approach, we innovatively develop a dataset called Unanswerable Math Word Problem (UMWP) which comprises 5200 questions across five categories. We developed an evaluation methodology combining text similarity and mathematical expression detection to determine whether LLM considers the question unanswerable. The results of extensive experiments conducted on 31 LLMs, including GPT-3, InstructGPT, LLaMA, and Claude, demonstrate that in-context learning and reinforcement learning with human feedback (RLHF) training significantly enhance the model's ability to avoid hallucination. We show that utilizing MWP is a reliable and effective approach to assess hallucination. Our code and data are available at https://github.com/Yuki-Asuuna/UMWP.",
keywords = "Dataset, Hallucination, Large Language Model, Math Word Problem",
author = "Yuhong Sun and Zhangyue Yin and Qipeng Guo and Jiawen Wu and Xipeng Qiu and Hui Zhao",
note = "Publisher Copyright: {\textcopyright} 2024 ELRA Language Resource Association: CC BY-NC 4.0.; Joint 30th International Conference on Computational Linguistics and 14th International Conference on Language Resources and Evaluation, LREC-COLING 2024 ; Conference date: 20-05-2024 Through 25-05-2024",
year = "2024",
language = "英语",
series = "2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC-COLING 2024 - Main Conference Proceedings",
publisher = "European Language Resources Association (ELRA)",
pages = "2178--2188",
editor = "Nicoletta Calzolari and Min-Yen Kan and Veronique Hoste and Alessandro Lenci and Sakriani Sakti and Nianwen Xue",
booktitle = "2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation, LREC-COLING 2024 - Main Conference Proceedings",
}