@techreport{KanginPugeaultTech2018a, author = {Kangin, Dmitry and Pugeault, Nicolas}, title = {On-Policy Trust Region Policy Optimisation with Replay Buffers}, publisher={arXiv}, institution={University of Exeter}, year = {2018}, url = {http://hdl.handle.net/10871/35684} }