@inproceedings{ an2023dppo, title={Direct Preference-based Policy Optimization without Reward Modeling}, author={Gaon An and Junhyeok Lee and Xingdong Zuo and Norio Kosaka and Kyung-Min Kim and Hyun Oh Song}, booktitle={Neural Information Processing Systems}, year={2023} }