Publications | Zhongkai Yu

2026

ArXiv

ChipBench: A Next-Step Benchmark for Evaluating LLM Performance in AI-Aided Chip Design

Zhongkai Yu, Chenyang Zhou, Yichen Lin, Hejia Zhang, Haotian Ye, Junxia Cui, Zaifeng Pan, Jishen Zhao, and Yufei Ding

2026

@misc{yu2026chipbenchnextstepbenchmarkevaluating,
  title = {ChipBench: A Next-Step Benchmark for Evaluating LLM Performance in AI-Aided Chip Design},
  author = {Yu, Zhongkai and Zhou, Chenyang and Lin, Yichen and Zhang, Hejia and Ye, Haotian and Cui, Junxia and Pan, Zaifeng and Zhao, Jishen and Ding, Yufei},
  year = {2026},
}

2025

ArXiv

Orders in Chaos: Enhancing Large-Scale MoE LLM Serving with Data Movement Forecasting

Zhongkai Yu, Yue Guan, Zihao Yu, Chenyang Zhou, Zhengding Hu, Shuyi Pei, Yangwook Kang, Yufei Ding, and Po-An Tsai

arXiv preprint arXiv:2510.05497, 2025

Bib PDF

@article{yu2025orders,
  title = {Orders in Chaos: Enhancing Large-Scale MoE LLM Serving with Data Movement Forecasting},
  author = {Yu, Zhongkai and Guan, Yue and Yu, Zihao and Zhou, Chenyang and Hu, Zhengding and Pei, Shuyi and Kang, Yangwook and Ding, Yufei and Tsai, Po-An},
  journal = {arXiv preprint arXiv:2510.05497},
  year = {2025},
}

OSDI’25

KPerfIR: Towards an Open and Compiler-centric Ecosystem for GPU Kernel Performance Tooling on Modern AI Workloads

Yue Guan, Yuanwei Fang, Keren Zhou, Corbin Robeck, Manman Ren, Zhongkai Yu, and Yufei Ding

In USENIX Symposium on Operating Systems Design and Implementation, 2025

Bib PDF

@inproceedings{kperir,
  title = {KPerfIR: Towards an Open and Compiler-centric Ecosystem for GPU Kernel Performance Tooling on Modern AI Workloads},
  author = {Guan, Yue and Fang, Yuanwei and Zhou, Keren and Robeck, Corbin and Ren, Manman and Yu, Zhongkai and Ding, Yufei},
  booktitle = {USENIX Symposium on Operating Systems Design and Implementation},
  year = {2025},
}

MLsys’25

FastTree: Optimizing Attention Kernel and Runtime for Tree-Structured LLM Inference (To Appear)

Zaifeng Pan, Yitong Ding, Yue Guan, Zheng Wang, Zhongkai Yu, Xulong Tang, Yida Wang, and Yufei Ding

In Proceedings of Machine Learning and Systems, 2025

Bib

@inproceedings{pan2024fasttree,
  title = {FastTree: Optimizing Attention Kernel and Runtime for Tree-Structured LLM Inference (To Appear)},
  author = {Pan, Zaifeng and Ding, Yitong and Guan, Yue and Wang, Zheng and Yu, Zhongkai and Tang, Xulong and Wang, Yida and Ding, Yufei},
  year = {2025},
  booktitle = {Proceedings of Machine Learning and Systems},
}

2024

MICRO’24

Cambricon-LLM: A Chiplet-Based Hybrid Architecture for On-Device Inference of 70B LLM

Zhongkai Yu, Shengwen Liang, Tianyun Ma, Yunke Cai, Ziyuan Nan, Di Huang, Xinkai Song, Yifan Hao, Jie Zhang, Tian Zhi, Yongwei Zhao, Zidong Du, Xing Hu, Qi Guo, and Tianshi Chen

In Proceedings of the 57th IEEE/ACM International Symposium on Microarchitecture, 2024

Bib PDF

@inproceedings{yu2024cambricon,
  title = {Cambricon-LLM: A Chiplet-Based Hybrid Architecture for On-Device Inference of 70B LLM},
  author = {Yu, Zhongkai and Liang, Shengwen and Ma, Tianyun and Cai, Yunke and Nan, Ziyuan and Huang, Di and Song, Xinkai and Hao, Yifan and Zhang, Jie and Zhi, Tian and Zhao, Yongwei and Du, Zidong and Hu, Xing and Guo, Qi and Chen, Tianshi},
  booktitle = {Proceedings of the 57th IEEE/ACM International Symposium on Microarchitecture},
  pages = {1474--1488},
  year = {2024},
  organization = {IEEE},
}

TACO

Environmental Condition Aware Super-Resolution Acceleration Framework in Server-Client Hierarchies

Zhuoran Song, Zhongkai Yu, Xinkai Song, Yifan Hao, Li Jiang, Naifeng Jing, and Xiaoyao Liang

ACM Transactions on Architecture and Code Optimization, 2024

Bib PDF

@article{song2024environmental,
  title = {Environmental Condition Aware Super-Resolution Acceleration Framework in Server-Client Hierarchies},
  author = {Song, Zhuoran and Yu, Zhongkai and Song, Xinkai and Hao, Yifan and Jiang, Li and Jing, Naifeng and Liang, Xiaoyao},
  journal = {ACM Transactions on Architecture and Code Optimization},
  volume = {21},
  number = {4},
  pages = {1--26},
  year = {2024},
  publisher = {ACM New York, NY},
}

TCAD

Harmonia: A Unified Architecture for Efficient Deep Symbolic Regression

Tianyun Ma, Yuanbo Wen, Xinkai Song, Pengwei Jin, Di Huang, Husheng Han, Ziyuan Nan, Zhongkai Yu, Shaohui Peng, Yongwei Zhao, and others

IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems, 2024

Bib

@article{ma2024harmonia,
  title = {Harmonia: A Unified Architecture for Efficient Deep Symbolic Regression},
  author = {Ma, Tianyun and Wen, Yuanbo and Song, Xinkai and Jin, Pengwei and Huang, Di and Han, Husheng and Nan, Ziyuan and Yu, Zhongkai and Peng, Shaohui and Zhao, Yongwei and others},
  journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
  year = {2024},
  publisher = {IEEE},
}

2022

DAC’22

E2sr: an end-to-end video codec assisted system for super resolution acceleration

Zhuoran Song, Zhongkai Yu, Naifeng Jing, and Xiaoyao Liang

In Proceedings of the 59th ACM/IEEE Design Automation Conference, 2022

Bib PDF

@inproceedings{song2022e2sr,
  title = {E2sr: an end-to-end video codec assisted system for super resolution acceleration},
  author = {Song, Zhuoran and Yu, Zhongkai and Jing, Naifeng and Liang, Xiaoyao},
  pages = {229--234},
  booktitle = {Proceedings of the 59th ACM/IEEE Design Automation Conference},
  year = {2022},
}