@inproceedings{3a39f53a5b474f05b3707ce5dae006ce,
title = "Automatic extraction rules generation based on XPath pattern learning",
abstract = "Web forums have become important information sources on the Web due to their rich content contributed by millions of Internet users every day. Data extraction from Web pages is a key but cumbersome step for data analysis because of significant human intervention. Web forums have fairly regular structures which allow us to generate extraction rules automatically according to their paths. In this paper, we introduce formal expressions for XPath patterns and pattern mapping rules, and advise machine learning methods to generate extraction rules for automatic data extraction from Web forums. The experimental results on real-life Web forums show good feasibility and accuracy for forum data.",
keywords = "Web forum, data extraction, mapping rule",
author = "Jingwei Zhang and Can Zhang and Weining Qian and Aoying Zhou",
year = "2011",
doi = "10.1007/978-3-642-24396-7\_6",
language = "英语",
isbn = "9783642243950",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
pages = "58--69",
booktitle = "Web Information Systems Engineering - WISE 2010 Workshops - WISE 2010 International Symposium WISS and International Workshops CISE, MBC, Revised Selected Papers",
note = "Workshops on Web Information Systems Engineering, WISE 2010: 1st International Symposium on Web Intelligent Systems and Services, WISS 2010, 2nd International Workshop on Mobile Business Collaboration, MBC 2010 and 1st Int. Workshop on CISE 2010 ; Conference date: 12-12-2010 Through 14-12-2010",
}