@inproceedings{kperir,title={KPerfIR: Towards a Open and Compiler-centric Ecosystem for GPU Kernel Performance Tooling on Modern AI Workloads (To Appear)},author={Guan, Yue and Fang, Yuanwei and Zhou, Keren and Robeck, Corbin and Ren, Manman and Yu, Zhongkai and Ding, Yufei},booktitle={USENIX Symposium on Operating Systems Design and Implementation},year={2025},}
MLsys’25
FastTree: Optimizing Attention Kernel and Runtime for Tree-Structured LLM Inference (To Appear)
@inproceedings{pan2024fasttree,title={FastTree: Optimizing Attention Kernel and Runtime for Tree-Structured LLM Inference (To Appear)},author={Pan, Zaifeng and Ding, Yitong and Guan, Yue and Wang, Zheng and Yu, Zhongkai and Tang, Xulong and Wang, Yida and Ding, Yufei},year={2025},booktitle={Proceedings of Machine Learning and Systems},}
2024
MICRO’24
Cambricon-LLM: A Chiplet-Based Hybrid Architecture for On-Device Inference of 70B LLM
Zhongkai Yu, Shengwen Liang, Tianyun Ma, Yunke Cai, Ziyuan Nan, Di Huang, Xinkai Song, Yifan Hao, Jie Zhang, Tian Zhi, Yongwei Zhao, Zidong Du, Xing Hu, Qi Guo, and Tianshi Chen
In Proceedings of the 57th IEEE/ACM International Symposium on Microarchitecture, 2024
@inproceedings{yu2024cambricon,title={Cambricon-LLM: A Chiplet-Based Hybrid Architecture for On-Device Inference of 70B LLM},author={Yu, Zhongkai and Liang, Shengwen and Ma, Tianyun and Cai, Yunke and Nan, Ziyuan and Huang, Di and Song, Xinkai and Hao, Yifan and Zhang, Jie and Zhi, Tian and Zhao, Yongwei and Du, Zidong and Hu, Xing and Guo, Qi and Chen, Tianshi},booktitle={Proceedings of the 57th IEEE/ACM International Symposium on Microarchitecture},pages={1474--1488},year={2024},organization={IEEE},}
TACO
Environmental Condition Aware Super-Resolution Acceleration Framework in Server-Client Hierarchies
Zhuoran Song, Zhongkai Yu, Xinkai Song, Yifan Hao, Li Jiang, Naifeng Jing, and Xiaoyao Liang
ACM Transactions on Architecture and Code Optimization, 2024
@article{song2024environmental,title={Environmental Condition Aware Super-Resolution Acceleration Framework in Server-Client Hierarchies},author={Song, Zhuoran and Yu, Zhongkai and Song, Xinkai and Hao, Yifan and Jiang, Li and Jing, Naifeng and Liang, Xiaoyao},journal={ACM Transactions on Architecture and Code Optimization},volume={21},number={4},pages={1--26},year={2024},publisher={ACM New York, NY},}
TCAD
Harmonia: A Unified Architecture for Efficient Deep Symbolic Regression
@article{ma2024harmonia,title={Harmonia: A Unified Architecture for Efficient Deep Symbolic Regression},author={Ma, Tianyun and Wen, Yuanbo and Song, Xinkai and Jin, Pengwei and Huang, Di and Han, Husheng and Nan, Ziyuan and Yu, Zhongkai and Peng, Shaohui and Zhao, Yongwei and others},journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},year={2024},publisher={IEEE},}
2022
DAC’22
E2sr: an end-to-end video codec assisted system for super resolution acceleration
Zhuoran Song, Zhongkai Yu, Naifeng Jing, and Xiaoyao Liang
In Proceedings of the 59th ACM/IEEE Design Automation Conference, 2022
@inproceedings{song2022e2sr,title={E2sr: an end-to-end video codec assisted system for super resolution acceleration},author={Song, Zhuoran and Yu, Zhongkai and Jing, Naifeng and Liang, Xiaoyao},pages={229--234},booktitle={Proceedings of the 59th ACM/IEEE Design Automation Conference},year={2022},}