@inproceedings{41e02fdcb5ed44ed9236a077d93a2257,
title = "AQapprox: Aggregation Queries Approximation with Distribution-Aware Online Sampling",
abstract = "Approximate query processing (AQP) is an effective way to provide approximate results for SQL queries, which relaxing accuracy in exchange for higher processing speed. In sampling-based AQP techniques, random sampling works well for uniformly distributed data but performs poorly on skewed data. To address this problem, we propose a distribution-aware approximation framework called AQapprox (aggregation queries approximation), to approximate queries more efficiently and accurately by extending Sapprox. We construct a probabilistic Map, which records the occurrences of sub-datasets on categorical columns and related statistics on numerical columns at each segment of the whole dataset. When a query arrives, AQapprox will combine Map and adaptively use different sampling methods based on the distribution. Experimental results on both real and synthetic datasets show that AQapprox can achieve a speedup by up to 5.9 for skewed data, 64 for uniform data over Sapprox, and has higher accuracy on multi-column queries.",
keywords = "AQP, Distribution-aware approximation, Probabilistic map",
author = "Han Wu and Xiaoling Wang and Xingjian Lu",
note = "Publisher Copyright: {\textcopyright} 2020, Springer Nature Switzerland AG.; 21st International Conference on Web Information Systems Engineering, WISE 2020 ; Conference date: 20-10-2020 Through 24-10-2020",
year = "2020",
doi = "10.1007/978-3-030-62008-0\_28",
language = "英语",
isbn = "9783030620073",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "404--416",
editor = "Zhisheng Huang and Wouter Beek and Hua Wang and Yanchun Zhang and Rui Zhou",
booktitle = "Web Information Systems Engineering – WISE 2020 - 21st International Conference, Proceedings",
address = "德国",
}