@inproceedings{57c3938648da427da7605d83da996b48,
title = "Optimizing top-K Retrieval: Submodularity analysis and search strategies",
abstract = "The key issue in top-k retrieval - finding a set of k documents (from a large document collection) that can best answer a user's query - is to strike the optimal balance between relevance and diversity. In this paper, we study the top-k retrieval problem in the framework of facility location analysis and prove the submodularity of that objective function which provides a theoretical approximation guarantee of factor for the (best-first) greedy search algorithm. Furthermore, we propose a two-stage hybrid search strategy which first obtains a high-quality initial set of top-k documents via greedy search, and then refines that result set iteratively via local search. Experiments on two large TREC benchmark datasets show that our two-stage hybrid search strategy approach outperforms the existing ones.",
author = "Chaofeng Sha and Keqiang Wang and Dell Zhang and Xiaoling Wang and Aoying Zhou",
year = "2014",
doi = "10.1007/978-3-319-08010-9\_3",
language = "英语",
isbn = "9783319080093",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "18--29",
booktitle = "Web-Age Information Management - 15th International Conference, WAIM 2014, Proceedings",
address = "德国",
note = "15th International Conference on Web-Age Information Management, WAIM 2014 ; Conference date: 16-06-2014 Through 18-06-2014",
}