@inproceedings{897801ef8e3d4b4e936e7ca74b5f4e85,
title = "UHRP: Uncertainty-Based Pruning Method for Anonymized Data Linear Regression",
abstract = "Anonymization method, as a kind of privacy protection technology for data publishing, has been heavily researched during the past twenty years. However, fewer researches have been conducted on making better use of the anonymized data for data mining. In this paper, we focus on training regression model using anonymized data and predicting on original samples using the trained model. Anonymized training instances are generally considered as hyper-rectangles, which is different from most machine learning tasks. We propose several hyper-rectangle vectorization methods that are compatible with both anonymized data and original data for model training. Anonymization brings additional uncertainty. To address this issue, we propose an Uncertainty-based Hyper-Rectangle Pruning method (UHRP) to reduce the disturbance introduced by anonymized data. In this method, we prune hyper-rectangle by its global uncertainty which is calculated from all uncertain attributes. Experiments show that a linear regressor trained on anonymized data could be expected to do as well as the model trained with original data under specific conditions. Experimental results also prove that our pruning method could further improve the model{\textquoteright}s performance.",
keywords = "Anonymization, Interval value, Machine learning",
author = "Kun Liu and Wenyan Liu and Junhong Cheng and Xingjian Lu",
note = "Publisher Copyright: {\textcopyright} 2019, Springer Nature Switzerland AG.; 24th International Conference on Database Systems for Advanced Applications, DASFAA 2019 ; Conference date: 22-04-2019 Through 25-04-2019",
year = "2019",
doi = "10.1007/978-3-030-18590-9\_2",
language = "英语",
isbn = "9783030185893",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "19--33",
editor = "Guoliang Li and Juggapong Natwichai and Joao Gama and Yongxin Tong and Jun Yang",
booktitle = "Database Systems for Advanced Applications - DASFAA 2019 International Workshops",
address = "德国",
}