@inproceedings{08b3581c77e44f229cef9f206f75aeed,
title = "Euge: Effective Utilization of GPU Resources for Serving DNN-Based Video Analysis",
abstract = "Deep Neural Network (DNN) has been widely adopted in video analysis application. The computation involved in DNN is more efficient on GPUs than on CPUs. However, recent serving systems involve the low utilization of GPU, due to limited process parallelism and storage overhead of DNN model. We propose Euge, which introduces multi-process service (MPS) and model sharing technology to support effective utilization of GPU. With MPS technology, multiple processes overcome the obstacle of GPU context and execute DNN-based video analysis on one GPU in parallel. Furthermore, by sharing the DNN-based model among threads within a process, Euge reduces the GPU memory overhead. We implement Euge on Spark and demonstrate the performance of vehicle detection workload.",
keywords = "DNN, GPU, MPS, Model sharing",
author = "Qihang Chen and Guangyao Ding and Chen Xu and Weining Qian and Aoying Zhou",
note = "Publisher Copyright: {\textcopyright} 2020, Springer Nature Switzerland AG.; 4th Asia-Pacific Web and Web-Age Information Management, Joint Conference on Web and Big Data, APWeb-WAIM 2020 ; Conference date: 18-09-2020 Through 20-09-2020",
year = "2020",
doi = "10.1007/978-3-030-60290-1\_40",
language = "英语",
isbn = "9783030602895",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "523--528",
editor = "Xin Wang and Rui Zhang and Young-Koo Lee and Le Sun and Yang-Sae Moon",
booktitle = "Web and Big Data - 4th International Joint Conference, APWeb-WAIM 2020, Proceedings",
address = "德国",
}