@inproceedings{9640caeab23a4074bb3e9095c1cb5874,
title = "DOCUMENT LAYOUT ANALYSIS VIA POSITIONAL ENCODING",
abstract = "Document layout analysis plays a vital role in computer vision research. Current document layout analysis methods mostly use pixel-based classification for document layout analysis. However, the method based on pixel classification is insufficient for maintaining the continuity of the classification area. In this paper, we propose a document layout analysis method based on positional encoding and bounding box specification. We maintain the continuity of the analysis area by constructing a document layout analysis framework based on the bounding box. In addition, we also integrate a positional encoding module in the framework to maintain the detailed information in the document layout analysis and modeling process. Experimental results prove that our proposed method has achieved state-of-the-art results.",
keywords = "Document layout analysis, bounding box, deep learning, position-encoding",
author = "Ejian Zhou and Xingjiao Wu and Luwei Xiao and Xiangcheng Du and Tianlong Ma and Liang He",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 29th IEEE International Conference on Image Processing, ICIP 2022 ; Conference date: 16-10-2022 Through 19-10-2022",
year = "2022",
doi = "10.1109/ICIP46576.2022.9897330",
language = "英语",
series = "Proceedings - International Conference on Image Processing, ICIP",
publisher = "IEEE Computer Society",
pages = "1156--1160",
booktitle = "2022 IEEE International Conference on Image Processing, ICIP 2022 - Proceedings",
address = "美国",
}