@inproceedings{f52b2e1f6d35416e91e80b589bf9475b,
title = "Entity matching across multiple heterogeneous data sources",
abstract = "Entity matching is the problem of identifying which entities in a data source refer to the same real-world entity in the others. Identifying entities across heterogeneous data sources is paramount to entity profiling, product recommendation, etc. The matching process is not only overwhelmingly expensive for large data sources since it involves all tuples from two or more data sources, but also need to handle heterogeneous entity attributes. In this paper, we design an unsupervised approach, called EMAN, to match entities across two or more heterogeneous data sources. The algorithm utilizes the locality sensitive hashing schema to reduce the candidate tuples and speed up the matching process. To handle the heterogeneous entity attributes, we employ the exponential family to model the similarities between the different attributes. EMAN is highly accurate and efficient even without any ground-truth tuples. We illustrate the performance of EMAN on re-identifying entities from the same data source, as well as matching entities across three real data sources. Our experimental results manifest that our proposed approach outperforms the comparable baseline.",
keywords = "Entity matching, Exponential family, Locality sensitive hashing",
author = "Chao Kong and Ming Gao and Chen Xu and Weining Qian and Aoying Zhou",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2016.; 21st International Conference on Database Systems for Advanced Applications, DASFAA 2016 ; Conference date: 16-04-2016 Through 19-04-2016",
year = "2016",
doi = "10.1007/978-3-319-32025-0\_9",
language = "英语",
isbn = "9783319320243",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "133--146",
editor = "Navathe, \{Shamkant B.\} and Weili Wu and Shashi Shekhar and Xiaoyong Du and Hui Xiong and Wang, \{X. Sean\}",
booktitle = "Database Systems for Advanced Applications - 21st International Conference, DASFAA 2016, Proceedings",
address = "德国",
}