@inproceedings{5e655ef243ad4768bea5caa8bfa55cb2,
title = "QDTSynth: Quality-Driven Formal Theorem Synthesis for Enhancing Proving Performance of LLMs",
abstract = "Automated Theorem Proving is an important and challenging task. Although large language models (LLMs) have demonstrated remarkable potential in mathematical reasoning, their performance in formal theorem proving remains constrained by the scarcity of high-quality supervised fine-tuning (SFT) data. To address this limitation, we propose a Quality-Driven Theorem Synthesis method (QDTSynth) in Lean4. During the statement synthesis, we enhance Monte Carlo Tree Search (MCTS) with an adaptive adjustment mechanism that dynamically optimizes the search strategy based on the synthesis of statements. In addition, we propose diversity screening and the self-assessment method to select theorems that exhibit both diversity and high quality from the initially synthetic statements, enabling the synthesis of a high-quality Lean4 theorem dataset. After fine-tuning three open-source large language models on our synthetic dataset, experiments on the miniF2F benchmark demonstrate that QDTSynth significantly improves the performance of various open-source LLMs in theorem proving tasks. Our work offers a promising new direction for the future synthesis of high-quality formal mathematical theorems.",
author = "Lei Wang and Ruobing Zuo and Gaolei He and Jianlin Wang and Zhengfeng Yang",
note = "Publisher Copyright: {\textcopyright} 2025 Association for Computational Linguistics.; 63rd Annual Meeting of the Association for Computational Linguistics, ACL 2025 ; Conference date: 27-07-2025 Through 01-08-2025",
year = "2025",
language = "英语",
series = "Proceedings of the Annual Meeting of the Association for Computational Linguistics",
publisher = "Association for Computational Linguistics (ACL)",
pages = "14683--14698",
editor = "Wanxiang Che and Joyce Nabende and Ekaterina Shutova and Pilehvar, \{Mohammad Taher\}",
booktitle = "Long Papers",
address = "澳大利亚",
}