• Reasoning Models Know When They're Right: Probing Hidden States for Self-Verification.
    Anqi Zhang, Yulin Chen, Jane Pan, Chen Zhao, Aurojit Panda, Jinyang Li and He He. arXiv:2504.05419 preprint, 2025. [bib]
    @article{zhang2025reasoning,
            author={Anqi Zhang and Yulin Chen and Jane Pan and Chen Zhao and Aurojit Panda and Jinyang Li and He He},
            title={Reasoning Models Know When They're Right: Probing Hidden States for Self-Verification},
            journal={arXiv:2504.05419},
            year={2025}
    }
    
  • Transformers Struggle to Learn to Search.
    Abulhair Saparov, Srushti Pawar, Shreyas Pimpalgaonkar, Nitish Joshi, Richard Yuanzhe Pang, Vishakh Padmakumar, Seyed Mehran Kazemi, Najoung Kim* and He He*. International Conference on Learning Representations (ICLR), 2025. [bib]
    @inproceedings{saparov2025search,
            author={Abulhair Saparov and Srushti Pawar and Shreyas Pimpalgaonkar and Nitish Joshi and Richard Yuanzhe Pang and Vishakh Padmakumar and Seyed Mehran Kazemi and Najoung Kim and He He},
            title={Transformers Struggle to Learn to Search},
            booktitle={International Conference on Learning Representations (ICLR)},
            year={2025}
    }
    
  • Language Models Learn to Mislead Humans via RLHF.
    Jiaxin Wen, Ruiqi Zhong, Akbir Khan, Ethan Perez, Jacob Steinhardt, Minlie Huang, Sam Boman, He He and Shi Feng. International Conference on Learning Representations (ICLR), 2025. [bib]
    @inproceedings{wen2025language,
            author={Jiaxin Wen and Ruiqi Zhong and Akbir Khan and Ethan Perez and Jacob Steinhardt and Minlie Huang and Sam Boman and He He and Shi Feng},
            title={Language Models Learn to Mislead Humans via RLHF},
            booktitle={International Conference on Learning Representations (ICLR)},
            year={2025}
    }
    
  • Parallel Structures in Pre-training Data Yield In-Context Learning.
    Yanda Chen, Chen Zhao, Zhou Yu, Kathleen McKeown and He He. Association for Computational Linguistics (ACL), 2024. [bib] [code]
    @inproceedings{chen2024parallel,
            author={Yanda Chen and Chen Zhao and Zhou Yu and Kathleen McKeown and He He},
            title={Parallel Structures in Pre-training Data Yield In-Context Learning},
            booktitle={Association for Computational Linguistics (ACL)},
            year={2024}
    }
    
  • Solving Olympiad Geometry without Human Demonstrations.
    Trieu Trinh, Yuhuai Wu, Quoc V Le, He He and Thang Luong. Nature (Nature), 2024. [bib]
    @article{trinh2024geometry,
            author={Trieu Trinh and Yuhuai Wu and Quoc V Le and He He and Thang Luong},
            title={Solving Olympiad Geometry without Human Demonstrations},
            journal={Nature},
            volume={625},
            pages={476--482},
            year={2024}
    }
    
  • Personas as a Way to Model Truthfulness in Language Models.
    Nitish Joshi*, Javier Rando*, Abulhair Saparov, Najoung Kim and He He. Empirical Methods in Natural Language Processing (EMNLP), 2024. [bib]
    @inproceedings{joshi2024persona,
            author={Nitish Joshi and Javier Rando and Abulhair Saparov and Najoung Kim and He He},
            title={Personas as a Way to Model Truthfulness in Language Models},
            booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
            year={2024}
    }
    
  • Does Writing with Language Models Reduce Content Diversity?.
    Vishakh Padmakumar and He He. International Conference on Learning Representations (ICLR), 2024. [bib] [code]
    @inproceedings{padmakumar2024writing,
            author={Vishakh Padmakumar and He He},
            title={Does Writing with Language Models Reduce Content Diversity?},
            booktitle={International Conference on Learning Representations (ICLR)},
            year={2024}
    }
    
  • Do Models Explain Themselves? Counterfactual Simulatability of Natural Language Explanations .
    Yanda Chen, Ruiqi Zhong, Narutatsu Ri, Chen Zhao, He He, Jacob Steinhardt, Zhou Yu and Kathleen McKeown. International Conference on Machine Learning (ICML), 2024. Spotlight [bib] [code]
    @inproceedings{chen2024do,
            author={Yanda Chen and Ruiqi Zhong and Narutatsu Ri and Chen Zhao and He He and Jacob Steinhardt and Zhou Yu and Kathleen McKeown},
            title={Do Models Explain Themselves? Counterfactual Simulatability of Natural Language Explanations },
            booktitle={International Conference on Machine Learning (ICML)},
            year={2024}
    }
    
  • Testing the General Deductive Reasoning Capacity of Large Language Models Using OOD Examples.
    Abulhair Saparov, Richard Yuanzhe Pang, Vishakh Padmakumar, Nitish Joshi, Seyed Mehran Kazemi, Najoung Kim* and He He*. Neural Information Processing Systems (NeurIPS), 2023. [bib] [code]
    @inproceedings{saparov2023testing,
            author={Abulhair Saparov and Richard Yuanzhe Pang and Vishakh Padmakumar and Nitish Joshi and Seyed Mehran Kazemi and Najoung Kim and He He},
            title={Testing the General Deductive Reasoning Capacity of Large Language Models Using OOD Examples},
            booktitle={Neural Information Processing Systems (NeurIPS)},
            year={2023}
    }
    
  • Reward Gaming in Conditional Text Generation.
    Richard Yuanzhe Pang, Vishakh Padmakumar, Thibault Sellam, Ankur P Parikh and He He. Association for Computational Linguistics (ACL), 2023. [bib] [talk]
    @inproceedings{pang2023reward,
            author={Richard Yuanzhe Pang and Vishakh Padmakumar and Thibault Sellam and Ankur P Parikh and He He},
            title={Reward Gaming in Conditional Text Generation},
            booktitle={Association for Computational Linguistics (ACL)},
            year={2023}
    }
    
  • Extrapolative Controlled Sequence Generation via Iterative Refinement .
    Vishakh Padmakumar, Richard Yuanzhe Pang, He He and Ankur P Parikh. International Conference on Machine Learning (ICML), 2023. [bib] [code]
    @inproceedings{padmakumar2023extrapolative,
            author={Vishakh Padmakumar and Richard Yuanzhe Pang and He He and Ankur P Parikh},
            title={Extrapolative Controlled Sequence Generation via Iterative Refinement },
            booktitle={International Conference on Machine Learning (ICML)},
            year={2023}
    }
    
  • Language Models are Greedy Reasoners: A Systematic Formal Analysis of Chain-of-Thought.
    Abulhair Saparov and He He. International Conference on Learning Representations (ICLR), 2023. [bib] [code] [talk]
    @inproceedings{saparov2023language,
            author={Abulhair Saparov and He He},
            title={Language Models are Greedy Reasoners: A Systematic Formal Analysis of Chain-of-Thought},
            booktitle={International Conference on Learning Representations (ICLR)},
            year={2023}
    }
    
  • Are All Spurious Features in Natural Language Alike? An Analysis through a Causal Lens.
    Nitish Joshi, Xiang Pan and He He. Empirical Methods in Natural Language Processing (EMNLP), 2022. [bib] [code] [talk]
    @inproceedings{joshi2022all,
            author={Nitish Joshi and Xiang Pan and He He},
            title={Are All Spurious Features in Natural Language Alike? An Analysis through a Causal Lens},
            booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
            year={2022}
    }
    
  • Meta-learning via Language Model In-context Tuning.
    Yanda Chen, Ruiqi Zhong, Sheng Zha, George Karypis and He He. Association for Computational Linguistics (ACL), 2022. [bib] [code]
    @inproceedings{chen2022meta,
            author={Yanda Chen and Ruiqi Zhong and Sheng Zha and George Karypis and He He},
            title={Meta-learning via Language Model In-context Tuning},
            booktitle={Association for Computational Linguistics (ACL)},
            year={2022}
    }
    
  • An Investigation of the (In)effectiveness of Counterfactually Augmented Data.
    Nitish Joshi and He He. Association for Computational Linguistics (ACL), 2022. [bib] [code]
    @inproceedings{joshi2022investigation,
            author={Nitish Joshi and He He},
            title={An Investigation of the (In)effectiveness of Counterfactually Augmented Data},
            booktitle={Association for Computational Linguistics (ACL)},
            year={2022}
    }
    
  • Text Generation by Learning from Demonstrations.
    Richard Yuanzhe Pang and He He. International Conference on Learning Representations (ICLR), 2021. [bib] [code] [talk]
    @inproceedings{pang2021text,
            author={Richard Yuanzhe Pang and He He},
            title={Text Generation by Learning from Demonstrations},
            booktitle={International Conference on Learning Representations (ICLR)},
            year={2021}
    }
    
  • An Empirical Study on Robustness to Spurious Correlations using Pre-trained Language Models.
    Lifu Tu, Garima Lalwani, Spandana Gella and He He. Transaction of Association for Computational Linguistics (TACL), 2020. [bib] [code]
    @article{tu2020empirical,
            author={Lifu Tu and Garima Lalwani and Spandana Gella and He He},
            title={An Empirical Study on Robustness to Spurious Correlations using Pre-trained Language Models},
            journal={TACL},
            volume={8},
            pages={},
            year={2020}
    }
    
  • FEQA: A Question Answering Evaluation Framework for Faithfulness Assessment in Abstractive Summarization.
    Esin Durmus, He He and Mona Diab. Association for Computational Linguistics (ACL), 2020. [bib] [code] [talk]
    @inproceedings{durmus2020feqa,
            author={Esin Durmus and He He and Mona Diab},
            title={FEQA: A Question Answering Evaluation Framework for Faithfulness Assessment in Abstractive Summarization},
            booktitle={Association for Computational Linguistics (ACL)},
            year={2020}
    }
    
  • Unlearn Dataset Bias for Natural Language Inference by Fitting the Residual.
    He He, Sheng Zha and Haohan Wang. EMNLP Workshop on DeepLo, 2019. [bib] [code] [poster]
    @inproceedings{he2019unlearn,
            author={He He and Sheng Zha and Haohan Wang},
            title={Unlearn Dataset Bias for Natural Language Inference by Fitting the Residual},
            booktitle={EMNLP Workshop on DeepLo},
            year={2019}
    }
    
  • Pun Generation with Surprise.
    He He*, Nanyun Peng* and Percy Liang. North American Chapter of the Association for Computational Linguistics (NAACL), 2019. [bib] [code] [codalab]
    @inproceedings{he2019pun,
            author={He He and Nanyun Peng and Percy Liang},
            title={Pun Generation with Surprise},
            booktitle={North American Chapter of the Association for Computational Linguistics (NAACL)},
            year={2019}
    }
    
  • Decoupling Strategy and Generation in Negotiation Dialogues.
    He He, Derek Chen, Anusha Balakrishnan and Percy Liang. Empirical Methods in Natural Language Processing (EMNLP), 2018. [bib] [project]
    @inproceedings{he2018decouple,
            author={He He and Derek Chen and Anusha Balakrishnan and Percy Liang},
            title={Decoupling Strategy and Generation in Negotiation Dialogues},
            booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
            year={2018}
    }
    
  • QuAC: Question Answering in Context.
    Eunsol Choi*, He He*, Mohit Iyyer*, Mark Yatskar*, Wen-tau Yih, Yejin Choi, Percy Liang and Luke Zettlemoyer. Empirical Methods in Natural Language Processing (EMNLP), 2018. [bib] [project]
    @inproceedings{choi2018quac,
            author={Eunsol Choi and He He and Mohit Iyyer and Mark Yatskar and Wen-tau Yih and Yejin Choi and Percy Liang and Luke Zettlemoyer},
            title={QuAC: Question Answering in Context},
            booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
            year={2018}
    }
    
  • Sharp Nearby, Fuzzy Far Away: How Neural Language Models Use Context.
    Urvashi Khandelwal, He He, Peng Qi and Dan Jurafsky. Association for Computational Linguistics (ACL), 2018. [bib] [code]
    @inproceedings{khandelwal2018lm,
            author={Urvashi Khandelwal and He He and Peng Qi and Dan Jurafsky},
            title={Sharp Nearby, Fuzzy Far Away: How Neural Language Models Use Context},
            booktitle={Association for Computational Linguistics (ACL)},
            year={2018}
    }
    
  • Delete, Retrieve, Generate: a Simple Approach to Sentiment and Style Transfer.
    Juncen Li, Robin Jia, He He and Percy Liang. North American Chapter of the Association for Computational Linguistics (NAACL), 2018. [bib] [code]
    @inproceedings{li2018style,
            author={Juncen Li and Robin Jia and He He and Percy Liang},
            title={Delete, Retrieve, Generate: a Simple Approach to Sentiment and Style Transfer},
            booktitle={North American Chapter of the Association for Computational Linguistics (NAACL)},
            year={2018}
    }
    
  • Learning Symmetric Collaborative Dialogue Agents with Dynamic Knowledge Graph Embeddings.
    He He, Anusha Balakrishnan, Mihail Eric and Percy Liang. Association for Computational Linguistics (ACL), 2017. [bib] [project]
    @inproceedings{he2017symmetric,
            author={He He and Anusha Balakrishnan and Mihail Eric and Percy Liang},
            title={Learning Symmetric Collaborative Dialogue Agents with Dynamic Knowledge Graph Embeddings},
            booktitle={Association for Computational Linguistics (ACL)},
            year={2017}
    }
    
  • Opponent Modeling in Deep Reinforcement Learning.
    He He, Jordan Boyd-Graber, Kevin Kwok and Hal Daume III. International Conference on Machine Learning (ICML), 2016. [bib] [code] [data]
    @inproceedings{he2016opponent,
            author={He He and Jordan Boyd-Graber and Kevin Kwok and Hal {Daum\'{e} III}},
            title={Opponent Modeling in Deep Reinforcement Learning},
            booktitle={International Conference on Machine Learning (ICML)},
            year={2016}
    }
    
  • Learning to Search in Branch and Bound Algorithms.
    He He, Hal Daume III and Jason Eisner. Neural Information Processing Systems (NeurIPS), 2014. [bib] [code] [poster]
    @inproceedings{he2014bb,
            author={He He and Hal {Daum\'{e} III} and Jason Eisner},
            title={Learning to Search in Branch and Bound Algorithms},
            booktitle={Neural Information Processing Systems (NeurIPS)},
            year={2014}
    }
    
  • Dynamic Feature Selection for Dependency Parsing.
    He He, Hal Daume III and Jason Eisner. Empirical Methods in Natural Language Processing (EMNLP), 2013. [bib] [talk]
    @inproceedings{he2013dep,
            author={He He and Hal {Daum\'{e} III} and Jason Eisner},
            title={Dynamic Feature Selection for Dependency Parsing},
            booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
            year={2013}
    }
    
  • Imitation Learning by Coaching.
    He He, Hal Daume III and Jason Eisner. Neural Information Processing Systems (NeurIPS), 2012. [bib] [poster]
    @inproceedings{he2012coaching,
            author={He He and Hal {Daum\'{e} III} and Jason Eisner},
            title={Imitation Learning by Coaching},
            booktitle={Neural Information Processing Systems (NeurIPS)},
            year={2012}
    }
    
  • Single Image Super-resolution using Gaussian Process Regression.
    He He and Wan-Chi Siu. Computer Vision and Pattern Recognition (CVPR), 2011. [bib] [code] [talk]
    @inproceedings{he2011superres,
            author={He He and Wan-Chi Siu},
            title={Single Image Super-resolution using Gaussian Process Regression},
            booktitle={Computer Vision and Pattern Recognition (CVPR)},
            year={2011}
    }