@inproceedings{501622f265b44ae8a85d7f0506836207,
title = "SEMI: A scalable entity matching system based on mapreduce",
abstract = "MapReduce framework provides a new platform for data integration on distributed environment. We demonstrate a MapReducebased entity resolution framework which efficiently solves the matching problem for structured, semi-structured and unstructured entities. We propose a random-based data representation method for reducing network transmission; we implement our design on MapReduce and design two solutions for reducing redundant comparisons. Our demo provides an easy-to-use platform for entity matching and performance analysis. We also compare the performance of our algorithm with the state-of-the-art blocking-based methods.",
author = "Pingfu Chao and Yuming Li and Zhu Gao and Junhua Fang and Xiaofeng He and Rong Zhang",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing Switzerland 2015.; 26th Australasian Database Conference, ADC 2015 ; Conference date: 04-06-2015 Through 07-06-2015",
year = "2015",
doi = "10.1007/978-3-319-19548-3\_29",
language = "英语",
isbn = "9783319195476",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "328--332",
editor = "Cheema, \{Muhammad Aamir\} and Jianzhong Qi and Sharaf, \{Mohamed A.\}",
booktitle = "Databases Theory and Applications - 26th Australasian Database Conference, ADC 2015, Proceedings",
address = "德国",
}