@inproceedings{ijcai2024p0093, title = {Why Only Text: Empowering Vision-and-Language Navigation with Multi-modal Prompts}, author = {Hong, Haodong and Wang, Sen and Huang, Zi and Wu, Qi and Liu, Jiajun}, booktitle = {Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence, {IJCAI-24}}, publisher = {International Joint Conferences on Artificial Intelligence Organization}, editor = {Kate Larson}, pages = {839--847}, year = {2024}, month = {8}, note = {Main Track}, doi = {10.24963/ijcai.2024/93}, url = {https://doi.org/10.24963/ijcai.2024/93}, }