Speaker
Description
Deconvolution of bulk RNA-seq data for simultaneous estimation of cell-type-specific gene expression profiles (GEPs) and relative cell abundances can be formulated as a constrained matrix factorization problem, but the widely used nonnegative matrix factorization (NMF) framework is mathematically ill-posed, as multiple factorizations may explain the observed bulk expression matrix equally well without further assumptions. Principled approaches for enforcing identifiability and improving estimation robustness remain underdeveloped.
We introduce GSNMF+, an augmented geometric-structure-guided NMF framework that imposes theory-motivated identifiability and solvability constraints on both factor matrices. The method leverages geometric structure associated with marker genes in bulk expression space to stabilize recovery of latent GEPs, and augments the factorization with artificially generated pseudo-bulk samples to strengthen the linear dependence between cellular composition and marker-gene expression. This augmentation improves robustness to instability in the underlying inverse problem. In addition, GSNMF+ includes a component annotation module to facilitate biological interpretation of inferred components. Evaluations on simulated data and realistic Plasmodium bulk RNA-seq datasets show that GSNMF+ accurately recovers latent stage compositions and exhibits greater stability and reliability than existing deconvolution methods across a range of settings.
Bibliography
@article{ChenDuan2022,
author = {Chen, Duan and Li, Shaoyu and Wang, Xue},
doi = {https://doi: 10.3934/fods.2022013},
journal = {Foundations of Data Science},
month = {Sep},
number = {3},
pages = {441-466},
title = {{Geometric structure guided model and algorithms for complete deconvolution of gene expression data}},
volume = {4},
year = {2022},
bdsk-url-1 = {https://doi:%2010.3934/fods.2022013}}
@article{li2025augmentedgsnmf,
title = {An Augmented GSNMF Model for Complete Deconvolution of Bulk RNA-seq Data},
author = {Li, S. and Xu, S. and Wang, X. and Ertekin-Taner, N. and Chen, D.},
journal = {Mathematical Biosciences and Engineering},
year = {2025},
volume = {22},
number = {4},
pages = {988--1018},
doi = {10.3934/mbe.2025036},
pmid = {40296800},
pmcid = {PMC12043048},
date = {2025-03-14}
}
@article{Xu2025Robustness,
author = {Xu, Su and Chen, Duan and Wang, Xue and Li, Shaoyu},
title = {Robustness and resilience of computational deconvolution methods for bulk RNA sequencing data},
journal = {Briefings in Bioinformatics},
volume = {26},
number = {3},
month = may,
year = {2025},
pages = {bbaf264},
doi = {10.1093/bib/bbaf264},
url = {https://doi.org/10.1093/bib/bbaf264}
}