@inproceedings{NEURIPS2020_d1e7b08b,
 author = {Lorberbom, Guy and Maddison, Chris and Heess, Nicolas and Hazan, Tamir and Tarlow, Daniel},
 booktitle = {Advances in Neural Information Processing Systems},
 editor = {H. Larochelle and M. Ranzato and R. Hadsell and M.F. Balcan and H. Lin},
 pages = {18076--18086},
 publisher = {Curran Associates, Inc.},
 title = {Direct Policy Gradients: Direct Optimization of Policies in Discrete Action Spaces},
 url = {https://proceedings.neurips.cc/paper_files/paper/2020/file/d1e7b08bdb7783ed4fb10abe92c22ffd-Paper.pdf},
 volume = {33},
 year = {2020}
}