@inproceedings{db08b18909c347f0a0e5176024527b13,
title = "A pattern-based SVM for protein remote homology detection",
abstract = "One key element in understanding the molecular machinery of the cell is to understand the structure and function of each protein encoded in the genome. A very successful means of inferring the structure or function of a previously un-annotated protein is via sequence homology with one or more protein whose structure or function is already known. In this paper, a novel method for protein remote homology detection has been presented. The technologies of text categorization from natural language processing have been used in protein classification. Patterns are discovered by TEIRESIAS algorithm and can be viewed as the {"}words{"} of {"}protein sequence language{"}. The patterns are then filtered by an efficient feature selection algorithm called chi-square algorithm. Each protein sequence is mapped into a high dimensional vector by the occurrence times of the selected patterns. This presentation, combined with a discriminative classification algorithm known as the Support Vector Machine (SVM), provides a powerful means for protein remote homology detection. The method, called SVM-pattern, is tested on the SCOP database and compared with other state-of-the-art methods. The performance of SVM-pattern is better than that of BLAST method and comparable with other SVM-based methods such as SVM-k-spectrum and SVM-pairwise.",
keywords = "Pattern, Protein, Remote homology, Text categorization",
author = "Dong, \{Qi Wen\} and Lei Lin and Wang, \{Xiao Long\} and Li, \{Ming Hui\}",
year = "2005",
language = "英语",
isbn = "078039092X",
series = "2005 International Conference on Machine Learning and Cybernetics, ICMLC 2005",
pages = "3363--3368",
booktitle = "2005 International Conference on Machine Learning and Cybernetics, ICMLC 2005",
note = "International Conference on Machine Learning and Cybernetics, ICMLC 2005 ; Conference date: 18-08-2005 Through 21-08-2005",
}