@inproceedings{7684480053584e589560972b3a72d1b7,
title = "SQT: Debiased Visual Question Answering via Shuffling Question Types",
abstract = "Visual Question Answering (VQA) aims to obtain answers through image-question pairs. Nowadays, the VQA model tends to get answers only through questions, ignoring the information in the images. This phenomenon is caused by bias. As indicated by previous studies, the bias in VQA mainly comes from text modality. Our analysis of bias suggests that the question type is a crucial factor in bias formation. To interrupt the shortcut from question type to answer for de-biasing, we propose a self-supervised method for Shuffling Question Types (SQT) to reduce bias from text modality, which overcomes the prior language problem by mitigating the question-to-answer bias without introducing external annotations. Moreover, we propose a new objective function for negative samples. Experimental results show that our approach can achieve 61.76\% accuracy on the VQA-CP v2 dataset, which outperforms the state-of-the-art in both self-supervised and supervised methods.",
keywords = "De-biasing, Self-supervised, Visual question answering",
author = "Tianyu Huai and Shuwen Yang and Junhang Zhang and Guoan Wang and Xinru Yu and Tianlong Ma and Liang He",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 IEEE International Conference on Multimedia and Expo, ICME 2023 ; Conference date: 10-07-2023 Through 14-07-2023",
year = "2023",
doi = "10.1109/ICME55011.2023.00109",
language = "英语",
series = "Proceedings - IEEE International Conference on Multimedia and Expo",
publisher = "IEEE Computer Society",
pages = "600--605",
booktitle = "Proceedings - 2023 IEEE International Conference on Multimedia and Expo, ICME 2023",
address = "美国",
}