@inproceedings{ijcai2024p0711, title = {ScreenAgent: A Vision Language Model-driven Computer Control Agent}, author = {Niu, Runliang and Li, Jindong and Wang, Shiqi and Fu, Yali and Hu, Xiyu and Leng, Xueyuan and Kong, He and Chang, Yi and Wang, Qi}, booktitle = {Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence, {IJCAI-24}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {Kate Larson}, pages = {6433--6441}, year = {2024}, month = {8}, note = {Main Track}, doi = {10.24963/ijcai.2024/711}, url = {https://doi.org/10.24963/ijcai.2024/711}, }