@inproceedings{ba5ac46c08024e0ca923e02d6497bab5,
title = "Orthogonalized sgd and nested architectures for anytime neural networks",
abstract = "We propose a novel variant of SGD customized for training network architectures that support anytime behavior: such networks produce a series of increasingly accurate outputs over time. Efficient architectural designs for these networks focus on re-using internal state; subnetworks must produce representations relevant for both immediate prediction as well as refinement by subsequent network stages. We consider traditional branched networks as well as a new class of recursively nested networks. Our new optimizer, Orthogonalized SGD, dynamically re-balances task-specific gradients when training a multitask network. In the context of anytime architectures, this optimizer projects gradients from later outputs onto a parameter subspace that does not interfere with those from earlier outputs. Experiments demonstrate that training with Orthogonalized SGD significantly improves generalization accuracy of anytime networks.",
author = "Chengcheng Wan and Henry Hoffmann and Shan Lu and Michael Maire",
note = "Publisher Copyright: {\textcopyright} 2020 by the Authors.; 37th International Conference on Machine Learning, ICML 2020 ; Conference date: 13-07-2020 Through 18-07-2020",
year = "2020",
language = "英语",
series = "37th International Conference on Machine Learning, ICML 2020",
publisher = "International Machine Learning Society (IMLS)",
pages = "9749--9759",
editor = "Hal Daume and Aarti Singh",
booktitle = "37th International Conference on Machine Learning, ICML 2020",
}