@inproceedings{ba1218ef5a8f442584086fcc12ec1326,
title = "Improved relative term frequency probability feature selection for document categorization",
abstract = "Feature selection is an important process to choose a subset of features relevant to a particular application in document classification. Those terms which occur unevenly in various categories have strong distinguishable information as to categorization. Firstly, based on the categorical document frequency probability (CTFP), a CTFP\_VM feature selection algorithm was designed for feature selection. Secondly, a maximum term frequency conditional distribution factor was proposed to improve the CTFP\_VM criterion further. We perform the document categorization experiments on SVM classifiers with the well-known Reuters-21578 and 20news-18828 corpuses as unbalanced and balanced corpus respectively. Experiments compare the novel methods with other conventional feature selection algorithms and the proposed method achieves the excellent feature set for document categorization.",
keywords = "Categorical distribution, Category tendency, Distribution probability, Term frequency, Variance mean",
author = "Qiang Li and Liang He and Xin Lin",
year = "2014",
doi = "10.4028/www.scientific.net/AMM.548-549.1102",
language = "英语",
isbn = "9783038350842",
series = "Applied Mechanics and Materials",
publisher = "Trans Tech Publications",
pages = "1102--1109",
booktitle = "Achievements in Engineering Sciences",
address = "德国",
note = "3rd International Conference on Manufacturing Engineering and Process, ICMEP 2014 ; Conference date: 10-04-2014 Through 11-04-2014",
}