@inproceedings{dd9f477f66a34dcabac48e257b2aaaab,
title = "Rethinking the Learning Paradigm for Dynamic Facial Expression Recognition",
abstract = "Dynamic Facial Expression Recognition (DFER) is a rapidly developing field that focuses on recognizing facial expressions in video format. Previous research has considered non-target frames as noisy frames, but we propose that it should be treated as a weakly supervised problem. We also identify the imbalance of short- and long-term temporal relationships in DFER. Therefore, we introduce the Multi-3D Dynamic Facial Expression Learning (M3DFEL) framework, which utilizes Multi-Instance Learning (MIL) to handle inexact labels. M3DFEL generates 3D-instances to model the strong short-term temporal relationship and utilizes 3DCNNs for feature extraction. The Dynamic Long-term Instance Aggregation Module (DLIAM) is then utilized to learn the long-term temporal relationships and dynamically aggregate the instances. Our experiments on DFEW and FERV39K datasets show that M3DFEL outperforms existing state-of-the-art approaches with a vanilla R3D18 backbone. The source code is available at https://github.com/faceeyes/M3DFEL.",
keywords = "Humans: Face, body, gesture, movement, pose",
author = "Hanyang Wang and Bo Li and Shuang Wu and Siyuan Shen and Feng Liu and Shouhong Ding and Aimin Zhou",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023 ; Conference date: 18-06-2023 Through 22-06-2023",
year = "2023",
doi = "10.1109/CVPR52729.2023.01722",
language = "英语",
series = "Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition",
publisher = "IEEE Computer Society",
pages = "17958--17968",
booktitle = "Proceedings - 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition, CVPR 2023",
address = "美国",
}