@inproceedings{6d91cce936354d4886bf46633b1dd82b,
title = "FastNER: Speeding up Inferences for Named Entity Recognition Tasks",
abstract = "BERT and its variants are the most performing models for named entity recognition (NER), a fundamental information extraction task. We must apply inference speedup methods for BERT-based NER models to be deployed in the industrial setting. Early exiting allows the model to use only the shallow layers to process easy samples, thus reducing the average latency. In this work, we introduce FastNER, a novel framework for early exiting with a BERT biaffine NER model, which supports both flat NER tasks and nested NER tasks. First, we introduce a convolutional bypass module to provide suitable features for the current layer{\textquoteright}s biaffine prediction head. This way, an intermediate layer can focus more on delivering high-quality semantic representations for the next layer. Second, we introduce a series of early exiting mechanisms for BERT biaffine model, which is the first in the literature. We conduct extensive experiments on 6 benchmark NER datasets, 3 of which are nested NER tasks. The experiments show that: (a) Our proposed convolutional bypass method can significantly improve the overall performances of the multi-exit BERT biaffine NER model. (b) our proposed early exiting mechanisms can effectively speed up the inference of BERT biaffine model. Comprehensive ablation studies are conducted and demonstrate the validity of our design for our FastNER framework.",
keywords = "Early Exiting, Inference speed-up, Pre-trained language models",
author = "Yuming Zhang and Xiangxiang Gao and Wei Zhu and Xiaoling Wang",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2023.; 19th International Conference on Advanced Data Mining and Applications, ADMA 2023 ; Conference date: 21-08-2023 Through 23-08-2023",
year = "2023",
doi = "10.1007/978-3-031-46661-8\_13",
language = "英语",
isbn = "9783031466601",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "185--199",
editor = "Xiaochun Yang and Bin Wang and Heru Suhartanto and Guoren Wang and Jing Jiang and Bing Li and Huaijie Zhu and Ningning Cui",
booktitle = "Advanced Data Mining and Applications - 19th International Conference, ADMA 2023, Proceedings",
address = "德国",
}