@inproceedings{4d1e9b7349f64383bdafbfebd9cd17a2,
title = "A Bayesian hierarchical model for comparing average F1 scores",
abstract = "In multi-class text classification, the performance (effectiveness) of a classifier is usually measured by micro-averaged and macro-averaged F1 scores. However, the scores themselves do not tell us how reliable they are in terms of forecasting the classifier's future performance on unseen data. In this paper, we propose a novel approach to explicitly modelling the uncertainty of average F1 scores through Bayesian reasoning, and demonstrate that it can provide much more comprehensive performance comparison between text classifiers than the traditional frequentist null hypothesis significance testing (NHST).",
keywords = "Bayesian inference, Hypothesis testing, Model comparison, Performance evaluation, Text classification",
author = "Dell Zhang and Jun Wang and Xiaoxue Zhao and Xiaoling Wang",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; 15th IEEE International Conference on Data Mining, ICDM 2015 ; Conference date: 14-11-2015 Through 17-11-2015",
year = "2016",
month = jan,
day = "5",
doi = "10.1109/ICDM.2015.44",
language = "英语",
series = "Proceedings - IEEE International Conference on Data Mining, ICDM",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "589--598",
editor = "Charu Aggarwal and Zhi-Hua Zhou and Alexander Tuzhilin and Hui Xiong and Xindong Wu",
booktitle = "Proceedings - 15th IEEE International Conference on Data Mining, ICDM 2015",
address = "美国",
}