@inproceedings{84878c1f5b9742f493360d822c359e6c,
title = "A comparative study on term weighting schemes for text categorization",
abstract = "The term weighting scheme, which is used to convert documents into vectors in the term spaces, is a vital step in automatic text categorization. The previous studies showed that term weighting schemes dominate the performance rather than the kernel functions of S Ms for the text categorization task. In this paper, we conducted experiments to compare various term weighting schemes with S M on two widely-used benchmark data sets. We also presented a new term weighting scheme t f . r f for text categorization. The cross-scheme comparison was performed by using McNcmar's Tests. The controlled experimental results showed that the newly proposed t f . r f scheme is significantly better than other term weighting schemes. Compared with schemes related with t f factor alone, the idf factor does not improve or even decrease the term's discriminating power for text categorization. The binary and t f .chi representations significantly underperform the other term weighting schemes.",
author = "Man Lan and Sung, \{Sam Yuan\} and Low, \{Hwee Boon\} and Tan, \{Chew Lim\}",
year = "2005",
doi = "10.1109/IJCNN.2005.1555890",
language = "英语",
isbn = "0780390482",
series = "Proceedings of the International Joint Conference on Neural Networks",
pages = "546--551",
booktitle = "Proceedings of the International Joint Conference on Neural Networks, IJCNN 2005",
note = "International Joint Conference on Neural Networks, IJCNN 2005 ; Conference date: 31-07-2005 Through 04-08-2005",
}