@inproceedings{75f6f464286449d1bc96df9c6043499f,
title = "ACF2: Accelerating Checkpoint-Free Failure Recovery for Distributed Graph Processing",
abstract = "Iterative computation in distributed graph processing systems typically incurs a long runtime. Hence, it is crucial for graph processing to tolerate and quick recover from intermittent failures. Existing solutions can be categorized into checkpoint-based and checkpoint-free solution. The former writes checkpoints periodically during execution, which leads to significant overhead. Differently, the latter requires no checkpoint. Once failure happens, it reloads input data and resets the value of lost vertices directly. However, reloading input data involves repartitioning, which incurs additional overhead. Moreover, we observe that checkpoint-free solution cannot effectively handle failures for graph algorithms with topological mutations. To address these issues, we propose ACF2 with a partition-aware backup strategy and an incremental protocol. In particular, the partition-aware backup strategy backs up the sub-graphs of all nodes after initial partitioning. Once failure happens, the partition-aware backup strategy recovers the lost sub-graphs from the backups, and then resumes computation like checkpoint-free solution. To effectively handle failures involving topological mutations, the incremental protocol logs topological mutations during normal execution which would be exploited for recovery. We implement ACF2 based on Apache Giraph and our experiments show that ACF2 significantly outperforms existing solutions.",
keywords = "Checkpoint-free, Failure recovery, Graph processing",
author = "Chen Xu and Yi Yang and Qingfeng Pan and Hongfu Zhou",
note = "Publisher Copyright: {\textcopyright} 2023, The Author(s), under exclusive license to Springer Nature Switzerland AG.; 6th International Joint Conference on Asia-Pacific Web (APWeb) and Web-Age Information Management (WAIM), APWeb-WAIM 2022 ; Conference date: 25-11-2022 Through 27-11-2022",
year = "2023",
doi = "10.1007/978-3-031-25158-0\_5",
language = "英语",
isbn = "9783031251573",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "45--59",
editor = "Bohan Li and Chuanqi Tao and Lin Yue and Xuming Han and Diego Calvanese and Toshiyuki Amagasa",
booktitle = "Web and Big Data - 6th International Joint Conference, APWeb-WAIM 2022, Proceedings",
address = "德国",
}