@inproceedings{aa7a990c93ac45349ffdf70a94ac8853,
title = "LFNet: Cross-Modal LiDAR-Fisheye Fusion Network for 3D Semantic Segmentation",
abstract = "Cross-modal fusion, which leverages images to enhance 3D semantic segmentation, has demonstrated significant effectiveness due to the complementary nature of heterogeneous data. However, existing approaches are limited to pinhole images, leaving fisheye images largely unexplored. In this paper, we introduce the LiDAR-Fisheye Fusion Network (LFNet), a dual-transformer architecture designed for cross-modal fusion (CMF) across hierarchical multi-scale layers. The 3D Transformer extracts point-level features from LiDAR data, while the pre-trained 2D Transformer extracts patch-level features from fisheye images.The CMF module comprises two key components: Local Fusion (LoF) and Global Fusion (GoF). The LoF module interpolates patch-level features to pixel-level for accurate feature alignment and computes precise point-to-pixel mappings for gated fusion. Meanwhile, the GoF module enables points to capture a holistic understanding of the scene via a cross-modal attention mechanism. Experimental results highlight the potential of fisheye images as a promising modality to complement LiDAR data in 3D semantic segmentation. The code will be available at https://github.com/wjzhang642/LFNet.",
keywords = "Fisheye Image, LiDAR, Point Cloud, Semantic Segmentation, Transformer",
author = "Weijian Zhang and Zhiwei Zhang and Tianfang Sun and Zhizhong Zhang and Tan Xin and Yuan Xie",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 IEEE International Conference on Multimedia and Expo, ICME 2025 ; Conference date: 30-06-2025 Through 04-07-2025",
year = "2025",
doi = "10.1109/ICME59968.2025.11209024",
language = "英语",
series = "Proceedings - IEEE International Conference on Multimedia and Expo",
publisher = "IEEE Computer Society",
booktitle = "2025 IEEE International Conference on Multimedia and Expo",
address = "美国",
}