Speaker
Description
B cell selection and evolution are key processes in regulating successful adaptive immune responses. Recent advances in single-cell sequencing and deep learning strategies have unlocked new potential to study affinity maturation of B cells at unprecedented scale and resolution. To unravel the complex dynamics of B cell repertoire evolution during immune responses and to facilitate Protein Language Model (PLM)-guided antibody engineering, we created the R package AntibodyForests \cite{van_ginneken_delineating_2025}. AntibodyForests encompasses pipelines to infer B cell lineages, quantify inter- and intra-antibody repertoire evolution, and analyze somatic hypermutation (SHM) using PLMs and protein structure. Using AntibodyForests, we explore how general and antibody-specific PLM-generated likelihoods relate to features of in vivo B cell selection, evolution, antigen specificity and binding affinity \cite{van_ginneken_protein_2025}. We find that PLM likelihoods correlate with biologically relevant features including isotype and V-gene usage, mutational load, and SHM patterns. Additionally, we observed that mutating residues along evolutionary trajectories tend to have lower PLM likelihoods than conserved residues. These results indicate that PLMs could predict to what amino acid SHM will most likely mutate and at which position. Interestingly, our findings challenge in vitro observations \cite{hie_efficient_2024} by revealing a negative correlation between PLM likelihoods and antigen binding affinity in in vivo repertoires. In our exploitation of these discoveries using six different PLMs and varying sequence regions, we uncovered that the region of antibody sequence (Complementarity-Determining Region (CDR3) or full-length VDJ) provided to the PLM, as well as the type of PLM used, influences the resulting likelihoods. These comparisons emphasize the importance of PLM long-range interaction, potential training data biases, and pairing heavy and light chains. Together, these studies highlight the power of combining repertoire-wide phylogenetic inference with PLMs to better understand the principles governing antibody evolution and selection, and offer new tools for therapeutic antibody discovery and engineering.
Bibliography
@article{van_ginneken_protein_2025,
title = {Protein language model pseudolikelihoods capture features of \textit{in vivo} {B} cell selection and evolution},
volume = {26},
copyright = {https://creativecommons.org/licenses/by/4.0/},
issn = {1467-5463, 1477-4054},
doi = {10.1093/bib/bbaf418},
language = {en},
number = {4},
urldate = {2026-03-17},
journal = {Briefings in Bioinformatics},
author = {Van Ginneken, Daphne and Samant, Anamay and Daga-Krumins, Karlis and Glänzer, Wiona and Agrafiotis, Andreas and Kladis, Evgenios and Reddy, Sai T and Yermanos, Alexander},
month = jul,
year = {2025},
pages = {bbaf418},
}
@article{van_ginneken_delineating_2025,
title = {Delineating inter- and intra-antibody repertoire evolution with {AntibodyForests}},
volume = {41},
copyright = {https://creativecommons.org/licenses/by/4.0/},
issn = {1367-4803, 1367-4811},
doi = {10.1093/bioinformatics/btaf560},
language = {en},
number = {10},
urldate = {2026-03-17},
journal = {Bioinformatics},
author = {Van Ginneken, Daphne and Tromp, Valentijn and Stalder, Lucas and Cotet, Tudor-Stefan and Bakker, Sophie and Samant, Anamay and Reddy, Sai T and Yermanos, Alexander},
editor = {Schwartz, Russell},
month = oct,
year = {2025},
pages = {btaf560},
}
@article{hie_efficient_2024,
title = {Efficient evolution of human antibodies from general protein language models},
volume = {42},
issn = {1087-0156, 1546-1696},
doi = {10.1038/s41587-023-01763-2},
language = {en},
number = {2},
urldate = {2026-03-17},
journal = {Nature Biotechnology},
author = {Hie, Brian L. and Shanker, Varun R. and Xu, Duo and Bruun, Theodora U. J. and Weidenbacher, Payton A. and Tang, Shaogeng and Wu, Wesley and Pak, John E. and Kim, Peter S.},
month = feb,
year = {2024},
pages = {275--283},
}