Dohwan Ko1*, Sihyeon Kim1*, Yumin Suh2, Vijay Kumar2, Minseo Yoon1, Manmohan Chandraker2,3, Hyunwoo J. Kim4
1Korea University 2NEC Labs America 3UC San Diego 4KAIST
Code will be available soon!
@article{ko2025st,
title={ST-VLM: Kinematic Instruction Tuning for Spatio-Temporal Reasoning in Vision-Language Models},
author={Ko, Dohwan and Kim, Sihyeon and Suh, Yumin and Yoon, Minseo and Chandraker, Manmohan and Kim, Hyunwoo J and others},
journal={arXiv preprint arXiv:2503.19355},
year={2025}
}