@inproceedings{29723fbbcda645a59f071a64ce095b47,
title = "SAI: Latency-Aware Satellite Edge LAM Inference with Looped Transformer",
abstract = "The rapid advancements in computing and communication capabilities of Low Earth Orbit (LEO) satellites have made it feasible to execute complex and collaborative inorbit computation missions. Transformer-based large AI models (LAMs), known for their exceptional performance in in-context learning (ICL) and prompt-based reasoning, have attracted significant attention, providing powerful intelligence across sectors such as industry and aerospace. However, the significant parameter volume of LAMs poses a substantial challenge for direct deployment on satellites with constrained computing power and energy provision. To address this, the looped Transformer model reduces parameter requirements through layerwise parameter sharing, achieving performance comparable to vanilla Transformer-based LAMs in ICL tasks. Despite this efficiency, the limited and heterogeneous space-borne computing and storage capabilities complicate the orchestration for balanced workload allocation during multi-satellite cooperation. In this paper, we propose SAI, a collaborative multi-satellite space AI system that exploits the memory efficiency of the looped Transformer and the inherent parallelism in batch data processing. SAI enables accelerated on-satellite inference by integrating heterogeneous onboard resources and introducing a novel hybrid approach combining data and pipeline parallelism. This approach supports cross-satellite cooperation with parallelism planning and asynchronous inter-batch overlapping, significantly reducing inference latency and enhancing resource efficiency. Furthermore, SAI optimizes inference latency by formulating it as a shortest-path problem, effectively solved via Dijkstras algorithm. Extensive evaluations demonstrate SAIs superior performance in reducing inference latency and runtime memory usage compared to existing baselines.",
keywords = "In-context Learning, Satellite, Space Computing, Transformer",
author = "Honggang Yuan and Zixin Wang and Yuning Jiang and Xin Liu and Yuanming Shi and Ting Wang",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 IEEE International Conference on Communications, ICC 2025 ; Conference date: 08-06-2025 Through 12-06-2025",
year = "2025",
doi = "10.1109/ICC52391.2025.11161072",
language = "英语",
series = "IEEE International Conference on Communications",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "2472--2477",
editor = "Matthew Valenti and David Reed and Melissa Torres",
booktitle = "ICC 2025 - IEEE International Conference on Communications",
address = "美国",
}