@inproceedings{5e6c594595304ec9adc433651285379c,
title = "ARTIST: A Transformer-based Chinese Text-to-Image Synthesizer Digesting Linguistic and World Knowledge",
abstract = "Text-to-Image Synthesis (TIS) is a popular task to convert natural language texts into realistic images. Recently, transformer-based TIS models (such as DALL-E) have been proposed using the encoder-decoder architectures. Yet, these billion-scale TIS models are difficult to tune and deploy in resource-constrained environments. In addition, there is a lack of language-specific TIS benchmarks for Chinese, together with high-performing models with moderate sizes. In this work, we present ARTIST, A tRansformer-based Chinese Text-to-Image SynThesizer for high-quality image generation. In ARTIST, the rich linguistic and relational knowledge facts are injected into the model to ensure better model performance without the usage of ultra-large models. We further establish a large-scale Chinese TIS benchmark with the re-production results of state-of-the-art transformer-based TIS models. Results show ARTIST outperforms previous approaches.",
author = "Tingting Liu and Chengyu Wang and Xiangru Zhu and Lei Li and Minghui Qiu and Jun Huang and Ming Gao and Yanghua Xiao",
note = "Publisher Copyright: {\textcopyright} 2022 Association for Computational Linguistics.; 2022 Findings of the Association for Computational Linguistics: EMNLP 2022 ; Conference date: 07-12-2022 Through 11-12-2022",
year = "2022",
doi = "10.18653/v1/2022.findings-emnlp.527",
language = "英语",
series = "Findings of the Association for Computational Linguistics: EMNLP 2022",
publisher = "Association for Computational Linguistics (ACL)",
pages = "881--888",
editor = "Yoav Goldberg and Zornitsa Kozareva and Yue Zhang",
booktitle = "Findings of the Association for Computational Linguistics",
address = "澳大利亚",
}