@inproceedings{c87a606093144a44b3d589c42b054f85,
title = "Reducing unknown unknowns with guidance in image caption",
abstract = "Deep recurrent models applied in Image Caption, which link up computer vision and natural language processing, have achieved excellent results enabling automatically generating natural sentences describing an image. However, the mismatch of sample distribution between training data and the open world may leads to tons of hiding-in-dark Unknown Unknowns (UUs). And such errors may greatly harm the correctness of generated captions. In this paper, we present a framework targeting on UUs reduction and model optimization based on recurrently training with small amounts of external data detected under assistance of crowd commonsense. We demonstrate and analyze our method with currently state-of-the-art image-to-text model. Aiming at reducing the number of UUs in generated captions, we obtain over 12\% of UUs reduction and reinforcement of model cognition on these scenes.",
keywords = "Commonsense, Crowdsourcing, Image caption, Recurrent neural network",
author = "Mengjun Ni and Jing Yang and Xin Lin and Liang He",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG 2017.; 26th International Conference on Artificial Neural Networks, ICANN 2017 ; Conference date: 11-09-2017 Through 14-09-2017",
year = "2017",
doi = "10.1007/978-3-319-68612-7\_62",
language = "英语",
isbn = "9783319686110",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "547--555",
editor = "Alessandra Lintas and Villa, \{Alessandro E.\} and Stefano Rovetta and Verschure, \{Paul F.\}",
booktitle = "Artificial Neural Networks and Machine Learning – ICANN 2017 - 26th International Conference on Artificial Neural Networks, Proceedings",
address = "德国",
}