@article{yu2026chipmate,title={ChipMATE: Multi-Agent Training via Reinforcement Learning for Enhanced RTL Generation},author={Yu, Zhongkai and Lin, Yichen and Zhou, Chenyang and Zhang, Yuwei and Zhou, Kun and Cui, Junxia and Ye, Haotian and Hu, Zhengding and Pan, Zaifeng and Wang, Ruiyi and Zhao, Yujie and Zhang, Hejia and Shang, Jingbo and Zhao, Jishen and Ding, Yufei},journal={arXiv preprint arXiv:2605.12857},year={2026},}
ArXiv
ChipBench: A Next-Step Benchmark for Evaluating LLM Performance in AI-Aided Chip Design
Zhongkai Yu*, Chenyang Zhou*, Yichen Lin, Hejia Zhang, Haotian Ye, Junxia Cui, Zaifeng Pan, Jishen Zhao, and Yufei Ding
@article{yu2026chipbenchnextstepbenchmarkevaluating,title={ChipBench: A Next-Step Benchmark for Evaluating LLM Performance in AI-Aided Chip Design},author={Yu, Zhongkai and Zhou, Chenyang and Lin, Yichen and Zhang, Hejia and Ye, Haotian and Cui, Junxia and Pan, Zaifeng and Zhao, Jishen and Ding, Yufei},journal={arXiv preprint arXiv:2601.21448},year={2026},}
ArXiv
AMMA: A Multi-Chiplet Memory-Centric Architecture for Low-Latency 1M Context Attention Serving
Zhongkai Yu, Haotian Ye, Chenyang Zhou, Ohm Rishabh Venkatachalam, Zaifeng Pan, Zhengding Hu, Junsung Kim, Won Woo Ro, Po-An Tsai, Shuyi Pei, Yangwook Kang, and Yufei Ding
@article{yu2026ammamultichipletmemorycentricarchitecture,title={AMMA: A Multi-Chiplet Memory-Centric Architecture for Low-Latency 1M Context Attention Serving},author={Yu, Zhongkai and Ye, Haotian and Zhou, Chenyang and Venkatachalam, Ohm Rishabh and Pan, Zaifeng and Hu, Zhengding and Kim, Junsung and Ro, Won Woo and Tsai, Po-An and Pei, Shuyi and Kang, Yangwook and Ding, Yufei},journal={arXiv preprint arXiv:2604.26103},year={2026},}
ISCA’26
Patterns behind Chaos: Forecasting Data Movement for Efficient Large-Scale MoE LLM Inference
@article{yu2026patternschaosforecastingdata,title={Patterns behind Chaos: Forecasting Data Movement for Efficient Large-Scale MoE LLM Inference},author={Yu, Zhongkai and Guan, Yue and Yu, Zihao and Zhou, Chenyang and Hu, Zhengding and Pei, Shuyi and Kang, Yangwook and Ding, Yufei and Tsai, Po-An},journal={arXiv preprint arXiv:2510.05497},year={2026},}
ArXiv
ScaleSim: Serving Large-Scale Multi-Agent Simulation with Invocation Distance-Based Memory Management
@article{pan2026scalesim,title={ScaleSim: Serving Large-Scale Multi-Agent Simulation with Invocation Distance-Based Memory Management},author={Pan, Zaifeng and Shen, Yipeng and Hu, Zhengding and Wang, Zhuang and Manocha, Aninda and Wang, Zheng and Yu, Zhongkai and Guan, Yue and Ding, Yufei},journal={arXiv preprint arXiv:2601.21473},year={2026},}
ArXiv
Pancake: Hierarchical Memory System for Multi-Agent LLM Serving
Zhengding Hu, Zaifeng Pan, Prabhleen Kaur, Vibha Murthy, Zhongkai Yu, Yue Guan, Zhen Wang, Steven Swanson, and Yufei Ding
@article{hu2026pancake,title={Pancake: Hierarchical Memory System for Multi-Agent LLM Serving},author={Hu, Zhengding and Pan, Zaifeng and Kaur, Prabhleen and Murthy, Vibha and Yu, Zhongkai and Guan, Yue and Wang, Zhen and Swanson, Steven and Ding, Yufei},journal={arXiv preprint arXiv:2602.21477},year={2026},}
OpenReview
TritonGym: A Benchmark for Agentic LLM Workflows in Triton GPU Code Generation
Yue Guan, Yichen Lin, Xu Zhao, Jianzhu Yao, Xinwei Qiang, Zhongkai Yu, Pramod Viswanath, Yufei Ding, and Adnan Aziz
@article{guan2026tritongym,title={TritonGym: A Benchmark for Agentic LLM Workflows in Triton GPU Code Generation},author={Guan, Yue and Lin, Yichen and Zhao, Xu and Yao, Jianzhu and Qiang, Xinwei and Yu, Zhongkai and Viswanath, Pramod and Ding, Yufei and Aziz, Adnan},journal={OpenReview preprint},year={2026},}
ArXiv
JigsawRL: Assembling RL Pipelines for Efficient LLM Post-Training
Zhengding Hu, Hehua Ouyang, Chang Chen, Zaifeng Pan, Yue Guan, Zhongkai Yu, Zhen Wang, Steven Swanson, and Yufei Ding
@article{hu2026jigsawrl,title={JigsawRL: Assembling RL Pipelines for Efficient LLM Post-Training},author={Hu, Zhengding and Ouyang, Hehua and Chen, Chang and Pan, Zaifeng and Guan, Yue and Yu, Zhongkai and Wang, Zhen and Swanson, Steven and Ding, Yufei},journal={arXiv preprint arXiv:2604.23838},year={2026},}
2025
OSDI’25
KPerfIR: Towards an Open and Compiler-centric Ecosystem for GPU Kernel Performance Tooling on Modern AI Workloads
Yue Guan, Yuanwei Fang, Keren Zhou, Corbin Robeck, Manman Ren, Zhongkai Yu, Yufei Ding, and Adnan Aziz
In 19th USENIX Symposium on Operating Systems Design and Implementation, 2025
@inproceedings{kperir,title={KPerfIR: Towards an Open and Compiler-centric Ecosystem for GPU Kernel Performance Tooling on Modern AI Workloads},author={Guan, Yue and Fang, Yuanwei and Zhou, Keren and Robeck, Corbin and Ren, Manman and Yu, Zhongkai and Ding, Yufei and Aziz, Adnan},booktitle={19th USENIX Symposium on Operating Systems Design and Implementation},year={2025},}
TCAD
DomSim: Hardware-Aware Hybrid Fault Simulation with Dominator Tree-guided Partitioning
Mingjun Wang, Hui Wang, Feng Gu, Zizhen Liu, Jianan Mu, Shengwen Liang, Zhongkai Yu, Zheng Liang, Jun Gao, Jiaping Tang, Jing Ye, Bei Yu, Xiaowei Li, and Huawei Li
IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, 2025
@article{wang2025domsim,title={DomSim: Hardware-Aware Hybrid Fault Simulation with Dominator Tree-guided Partitioning},author={Wang, Mingjun and Wang, Hui and Gu, Feng and Liu, Zizhen and Mu, Jianan and Liang, Shengwen and Yu, Zhongkai and Liang, Zheng and Gao, Jun and Tang, Jiaping and Ye, Jing and Yu, Bei and Li, Xiaowei and Li, Huawei},journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},year={2025},}
MLsys’25
FastTree: Optimizing Attention Kernel and Runtime for Tree-Structured LLM Inference
@inproceedings{pan2024fasttree,title={FastTree: Optimizing Attention Kernel and Runtime for Tree-Structured LLM Inference},author={Pan, Zaifeng and Ding, Yitong and Guan, Yue and Wang, Zheng and Yu, Zhongkai and Tang, Xulong and Wang, Yida and Ding, Yufei},year={2025},booktitle={Proceedings of Machine Learning and Systems},volume={7},}
2024
MICRO’24
Cambricon-LLM: A Chiplet-Based Hybrid Architecture for On-Device Inference of 70B LLM
Zhongkai Yu*, Shengwen Liang*, Tianyun Ma, Yunke Cai, Ziyuan Nan, Di Huang, Xinkai Song, Yifan Hao, Jie Zhang, Tian Zhi, Yongwei Zhao, Zidong Du, Xing Hu, Qi Guo, and Tianshi Chen
In Proceedings of the 57th IEEE/ACM International Symposium on Microarchitecture, 2024
@inproceedings{yu2024cambricon,title={Cambricon-LLM: A Chiplet-Based Hybrid Architecture for On-Device Inference of 70B LLM},author={Yu, Zhongkai and Liang, Shengwen and Ma, Tianyun and Cai, Yunke and Nan, Ziyuan and Huang, Di and Song, Xinkai and Hao, Yifan and Zhang, Jie and Zhi, Tian and Zhao, Yongwei and Du, Zidong and Hu, Xing and Guo, Qi and Chen, Tianshi},booktitle={Proceedings of the 57th IEEE/ACM International Symposium on Microarchitecture},pages={1474--1488},year={2024},organization={IEEE},}
TACO
Environmental Condition Aware Super-Resolution Acceleration Framework in Server-Client Hierarchies
Zhuoran Song*, Zhongkai Yu*, Xinkai Song, Yifan Hao, Li Jiang, Naifeng Jing, and Xiaoyao Liang
ACM Transactions on Architecture and Code Optimization, 2024
@article{song2024environmental,title={Environmental Condition Aware Super-Resolution Acceleration Framework in Server-Client Hierarchies},author={Song, Zhuoran and Yu, Zhongkai and Song, Xinkai and Hao, Yifan and Jiang, Li and Jing, Naifeng and Liang, Xiaoyao},journal={ACM Transactions on Architecture and Code Optimization},volume={21},number={4},pages={1--26},year={2024},publisher={ACM New York, NY},}
TCAD
Harmonia: A Unified Architecture for Efficient Deep Symbolic Regression
@article{ma2024harmonia,title={Harmonia: A Unified Architecture for Efficient Deep Symbolic Regression},author={Ma, Tianyun and Wen, Yuanbo and Song, Xinkai and Jin, Pengwei and Huang, Di and Han, Husheng and Nan, Ziyuan and Yu, Zhongkai and Peng, Shaohui and Zhao, Yongwei and others},journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},year={2024},publisher={IEEE},}
2022
DAC’22
E2sr: an end-to-end video codec assisted system for super resolution acceleration
Zhuoran Song, Zhongkai Yu, Naifeng Jing, and Xiaoyao Liang
In Proceedings of the 59th ACM/IEEE Design Automation Conference, 2022
@inproceedings{song2022e2sr,title={E2sr: an end-to-end video codec assisted system for super resolution acceleration},author={Song, Zhuoran and Yu, Zhongkai and Jing, Naifeng and Liang, Xiaoyao},pages={229--234},booktitle={Proceedings of the 59th ACM/IEEE Design Automation Conference},year={2022},}