@inproceedings{bibcite_1091, author = {Supantho Rakshit and Adele Goldberg and Henry Conklin}, title = {Similarity All The Way Up: Multilingual Generalization in LLMs Relies on Language-Level Similarity Structures}, abstract = {
As Large Language Models (LLMs) become more and more successful at a wide range of tasks, their (in)ability to generalize remains difficult to quantify and poorly understood beyond limited domains. In particular, LLMs are known to struggle generalizing multilingually, to languages \
\ poorly attested in their training data. To understand why this may be, and what enables some models to perform better than others, we turn to a long history of work across the cognitive sciences, arguing that successful generalization derives from appropriate representations in similarity space. We look at how well LLMs{\textquoteright} representations capture the hierarchical similarity among distinct languages. Strikingly, we show LLMs{\textquoteright} representations largely recover the hierarchical structure of the Indo-European language family tree -- grouping languages that are members of the same subfamily closely together in representation space. Furthermore, we show that the degree to which models reflect the similarity structure of \ languages correlates with their performance on XNLI, a multilingual natural language inference benchmark. This extends classic work on similarity-driven generalization at scale, showing how models that represent similar languages similarly generalize better from one language to another.