@inproceedings{93ac37a9d0d94b0b90d1da7460d68b23,
title = "Multi-Modal Food Classification in a Diet Tracking System with Spoken and Visual Inputs",
keywords = "Convolutional Neural Network, Long Short-Term Memory, Transfer Learning, Transformer, Vision-and-Language",
author = "Shivani Gowda and Yifan Hu and Mandy Korpusik",
note = "Publisher Copyright: {\textcopyright} 2023 IEEE.; 48th IEEE International Conference on Acoustics, Speech and Signal Processing, ICASSP 2023 ; Conference date: 04-06-2023 Through 10-06-2023",
year = "2023",
doi = "10.1109/ICASSP49357.2023.10095762",
language = "English",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
booktitle = "ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing, Proceedings",
}