@inproceedings{77f4b22489734feb86b97840d95db7b4,
title = "Unsupervised Markdown Feature-Aware Keywords Extraction Towards Technology Blogs",
abstract = "A vast amount of blogs are generated from online technology communities every day. Most of them are in Markdown format. The increase of Markdown documents has brought opportunities and challenges to many natural language processing tasks. Extracting keywords from technology blogs is of great value for discovering, retrieving, and sharing knowl-edge about technical blogs. The mainstream keyword extraction algorithms remain to use statistical char-acteristics of words to determine the keywords of a document, seldom considering the structure char-acteristics of the document that potentially express the semantic information. We argue that Markdown markup features as well as the textual content of the document are both concerned with the keywords extraction. In this paper, we propose a novel un-supervised Markdown markup features aware key-words extraction algorithm for technology blogs. The algorithm integrates Markdown markup syntax in-formation with a blog text representation. Through experiments against TF-IDF, TextRank, and PositionRank algorithms on a real Markdown document dataset, our algorithm achieves higher performance with a substantial improvement when the number of keywords extracted is greater than 3.",
keywords = "Extraction, Markdown feature, TF-IDF, Tex-tRank, Unsupervised Machine Learning",
author = "Yangyang Wang and Liping Hua and Hui Zhao and Lingfeng Yang",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 46th IEEE Annual Computers, Software, and Applications Conference, COMPSAC 2022 ; Conference date: 27-06-2022 Through 01-07-2022",
year = "2022",
doi = "10.1109/COMPSAC54236.2022.00039",
language = "英语",
series = "Proceedings - 2022 IEEE 46th Annual Computers, Software, and Applications Conference, COMPSAC 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "223--228",
editor = "\{Va Leong\}, Hong and Sarvestani, \{Sahra Sedigh\} and Yuuichi Teranishi and Alfredo Cuzzocrea and Hiroki Kashiwazaki and Dave Towey and Ji-Jiang Yang and Hossain Shahriar",
booktitle = "Proceedings - 2022 IEEE 46th Annual Computers, Software, and Applications Conference, COMPSAC 2022",
address = "美国",
}