@inproceedings{vaswani2017attention, title = {Attention Is All You Need}, author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, { }Lukasz and Polosukhin, Illia}, booktitle = {Advances in Neural Information Processing Systems}, year = {2017} } @book{mckinney2017python, title = {Python for Data Analysis}, author = {McKinney, Wes}, publisher = {O'Reilly Media}, address = {Sebastopol, CA}, year = {2017}, edition = {2}, isbn = {978-1491957660} } @inproceedings{he2016resnet, title = {Deep Residual Learning for Image Recognition}, author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, pages = {770--778}, year = {2016}, doi = {10.1109/CVPR.2016.90}, url = {https://doi.org/10.1109/CVPR.2016.90} } @article{silver2017mastering, title = {Mastering the game of Go without human knowledge}, author = {Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and others}, journal = {Nature}, volume = {550}, number = {7676}, pages = {354--359}, year = {2017}, month = {oct}, doi = {10.1038/nature24270}, url = {https://www.nature.com/articles/nature24270} } @techreport{openai2023gpt4, title = {GPT-4 Technical Report}, author = {{OpenAI}}, institution = {OpenAI}, year = {2023}, number = {arXiv:2303.08774}, archiveprefix = {arXiv}, eprint = {2303.08774}, primaryclass = {cs.CL}, url = {https://arxiv.org/abs/2303.08774} } @phdthesis{doe2020thesis, title = {Learning Efficient Representations for Large-Scale Visual Recognition}, author = {Doe, Jane}, school = {Massachusetts Institute of Technology}, address = {Cambridge, MA}, year = {2020}, doi = {10.5555/mit-2020-xyz} } @incollection{cover2006entropy, title = {Entropy, Relative Entropy, and Mutual Information}, author = {Cover, Thomas M. and Thomas, Joy A.}, booktitle = {Elements of Information Theory}, publisher = {Wiley}, address = {Hoboken, NJ}, edition = {2}, year = {2006}, pages = {13--55}, isbn = {978-0471241959} } @misc{zenodo2021dataset, title = {ImageNet-21K Subset (Version 2.0)}, author = {Smith, John and Lee, Alice and Kumar, Ravi}, year = {2021}, howpublished = {Dataset on Zenodo}, doi = {10.5281/zenodo.1234567}, url = {https://doi.org/10.5281/zenodo.1234567}, note = {Accessed 2025-09-01} } @misc{sklearn2024, title = {scikit-learn: Machine Learning in Python (Version 1.4)}, author = {Pedregosa, Fabian and Varoquaux, Ga{"e}l and Gramfort, Alexandre and others}, year = {2024}, howpublished = {Software}, doi = {10.5281/zenodo.592264}, url = {https://scikit-learn.org} } @inproceedings{smith2024privacy, title = {Privacy-Preserving Training with Low-Precision Secure Aggregation}, author = {Smith, Emily and Zhang, Wei and Rossi, Marco and Patel, Neha}, booktitle = {Proceedings of the 41st International Conference on Machine Learning}, editor = {Smith, A. and Johnson, B.}, series = {Proceedings of Machine Learning Research}, volume = {235}, pages = {12345--12367}, address = {Vienna, Austria}, publisher = {PMLR}, month = {jul}, year = {2024}, url = {https://proceedings.mlr.press/v235/} } @article{kingma2015adam, title = {Adam: A Method for Stochastic Optimization}, author = {Kingma, Diederik P. and Ba, Jimmy}, journal = {International Conference on Learning Representations (ICLR)}, year = {2015}, archiveprefix = {arXiv}, eprint = {1412.6980}, primaryclass = {cs.LG}, url = {https://arxiv.org/abs/1412.6980} } @misc{raffel2020t5, title = {Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer}, author = {Raffel, Colin and Shazeer, Noam and Roberts, Adam and Lee, Katherine and Narang, Sharan and others}, year = {2020}, howpublished = {arXiv preprint}, archiveprefix = {arXiv}, eprint = {1910.10683}, primaryclass = {cs.LG}, doi = {10.48550/arXiv.1910.10683}, url = {https://arxiv.org/abs/1910.10683} }