diff --git a/_data/single-cell-transformers.yml b/_data/single-cell-transformers.yml index 3515fe0..0e39321 100644 --- a/_data/single-cell-transformers.yml +++ b/_data/single-cell-transformers.yml @@ -1,3 +1,15 @@ +- model: The Complexity of Automated Cell Type Annotations with GPT-4 + paper: + type: preprint + text: '[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)' + url: https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2 + code: + type: reproducible + text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/soulbio/cell_type_annotation)" + url: https://github.com/soulbio/cell_type_annotation + + + - model: BioLLM paper: type: preprint diff --git a/_site/_data/single-cell-transformers.yml b/_site/_data/single-cell-transformers.yml index b5816e3..0e39321 100644 --- a/_site/_data/single-cell-transformers.yml +++ b/_site/_data/single-cell-transformers.yml @@ -1,3 +1,66 @@ +- model: The Complexity of Automated Cell Type Annotations with GPT-4 + paper: + type: preprint + text: '[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)' + url: https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2 + code: + type: reproducible + text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/soulbio/cell_type_annotation)" + url: https://github.com/soulbio/cell_type_annotation + + + +- model: BioLLM + paper: + type: preprint + text: '[Ping Qiu, et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf)' + url: https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf + code: + type: reproducible + text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/BGIResearch/BioLLM)" + url: https://github.com/BGIResearch/BioLLM + omic_modalities: '-' + pre_training_dataset: '-' + input_embedding: '-' + architecture: '-' + ssl_tasks: '-' + supervised_tasks: '-' + + + + +- model: scGPT-spatial + paper: + type: preprint + text: '[Chloe Wang, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf)' + url: https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf + code: + type: reproducible + text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/bowang-lab/scGPT-spatial)" + url: https://github.com/bowang-lab/scGPT-spatial + omic_modalities: '-' + pre_training_dataset: '-' + input_embedding: '-' + architecture: '-' + ssl_tasks: '-' + supervised_tasks: '-' + +- model: scCello + paper: + type: peer_reviewed + text: '[Yuan, Xinyu, et al. 2024](https://openreview.net/pdf?id=aeYNVtTo7o)' + url: https://openreview.net/pdf?id=aeYNVtTo7o + code: + type: reproducible + text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/DeepGraphLearning/scCello)" + url: https://github.com/DeepGraphLearning/scCello + omic_modalities: scRNA-seq + pre_training_dataset: 23M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/)) + input_embedding: 'Ordering: rank-based' + architecture: Encoder + ssl_tasks: 'Multi-level pre-training: MLM with CE loss for gene level modeling; an ontologybased cell-type coherence loss for intra-cellular level modeling; a relational alignment loss to inject cell-type lineage from cell ontology graph for inter-cellular level modeling' + supervised_tasks: 'fine-tuning tasks: cell type classification; zero-shot tasks: cell type annotation, marker gene prediction, novel cell type prediction, cancer drug prediction' + - model: scGREAT paper: type: peer_reviewed @@ -14,6 +77,22 @@ ssl_tasks: '-' supervised_tasks: '-' +- model: MAMMAL + paper: + type: preprint + text: '[Shoshan et al. 2024](https://arxiv.org/abs/2410.22367)' + url: https://arxiv.org/abs/2410.22367 + code: + type: reproducible + text: "[ð\x9F\x9B\_ï¸\x8FGitHub](https://github.com/BiomedSciAI/biomed-multi-alignment)" + url: https://github.com/BiomedSciAI/biomed-multi-alignment + omic_modalities: bulk/scRNA-seq, amino acid sequences, SMILES molecule sequences + pre_training_dataset: CellXGene Human + input_embedding: '-' + architecture: T5 Encoder-Decoder + ssl_tasks: Expression-ranked gene masking (CELLxGENE Human), Protein LM (Uniref90), Antibody LM (OAS), Antibody Denoising (OAS), Small-Molecule LM (ZINC), Protein Interaction LM (STRING) + supervised_tasks: Cell type annotation (zheng68k), Cancer drug response prediction (GDSC1/2/3), Brain Blood Barrier Penetration prediction (MoleculeNet), Small-Molecule toxicity prediction (MoleculeNet), drug clinical trial result prediction (MoleculeNet), Antibody-Antigen binding prediction (HER2), Targeted antibody generation (SAbDAb), Protein-Protein delta-delta G prediction (SKEMPI v2), Drug-Target interaction prediction (PEER), TCR binding prediction (Weber et al) + - model: Nicheformer paper: type: peer_reviewed @@ -145,22 +224,6 @@ supervised_tasks: '-' -- model: scCello - paper: - type: preprint - text: '[Xinyu Yuan et al. 2024](https://github.com/theislab/single-cell-transformer-papers/issues/32)' - url: https://github.com/theislab/single-cell-transformer-papers/issues/32 - code: - type: '-' - text: "[ð\x9F\x94\x8DGitHub](https://github.com/DeepGraphLearning/scCello)" - url: 'https://github.com/DeepGraphLearning/scCello' - omic_modalities: '-' - pre_training_dataset: '-' - input_embedding: '-' - architecture: '-' - ssl_tasks: '-' - supervised_tasks: '-' - - model: scGenePT paper: type: preprint diff --git a/_site/_data/transformer-evaluation.yml b/_site/_data/transformer-evaluation.yml index bc35cf4..3a09984 100644 --- a/_site/_data/transformer-evaluation.yml +++ b/_site/_data/transformer-evaluation.yml @@ -26,22 +26,6 @@ tasks: '-' notes: '-' - -- paper: - type: preprint - text: '[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)' - url: https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf - code: - type: 'reproducible' - text: '[ð\x9F\x9B\_ï¸\x8FGitHub](https://github.com/ggit12/anndictionary/)' - url: 'https://github.com/ggit12/anndictionary/' - omic_modalities: '-' - evaluated_transformers: '-' - tasks: '-' - notes: '-' - - - - paper: type: preprint text: '[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)' @@ -77,10 +61,10 @@ type: 'reproducible' text: '[ð\x9F\x9B\_ï¸\x8FGitHub](https://github.com/aaronwtr/PertEval)' url: 'https://github.com/aaronwtr/PertEval' - omic_modalities: '-' - evaluated_transformers: '-' - tasks: '-' - notes: '-' + omic_modalities: 'scRNA-seq' + evaluated_transformers: 'UCE, scBERT, scGPT, Geneformer, scFoundation' + tasks: 'Transcriptomic perturbation prediction' + notes: 'Introduces PertEval-scFM, a benchmark to assess the zero-shot utility of single-cell foundation model embeddings for transcriptomic perturbation prediction. Uses SPECTRA to generate train-test splits with increasing dissimilarity to evaluate robustness against distribution shift. Models are evaluated with MSE and AUSPC, with AUSPC reflecting robustness under distribution shift. Additional analyses include E-distance and predicted transcriptomic distributions across the top 20 DEGs. Findings suggest that single-cell foundation model embeddings capture average perturbation effects but generally lack robustness to distribution shift. Ongoing work demonstrates that the domain-specific model GEARS outperforms foundation model embeddings, indicating that masked-language modeling on gene expression data without domain-specific inductive biases is insufficient for accurate transcriptomic perturbation prediction.' @@ -194,3 +178,15 @@ evaluated_transformers: scGPT, Geneformer, scBERT tasks: Cell type annotation notes: Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly. +- paper: + type: preprint + text: '[Csendes et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1)' + url: https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1 + code: + type: reproducible + text: "[ð\x9F\x9B\_ï¸\x8FGitHub](https://github.com/turbine-ai/PerturbSeqPredBenchmark)" + url: https://github.com/turbine-ai/PerturbSeqPredBenchmark + omic_modalities: scRNA-seq + evaluated_transformers: scGPT + tasks: Genetic perturbation effect prediction + notes: Simple baseline models can outperform scGPT on perturbational downstream tasks. The most widely used benchmarking datasets contain significant biases, making them suboptimal for evaluation. diff --git a/_site/_pages/implementations.html b/_site/_pages/implementations.html index f9f4e09..7a53da2 100644 --- a/_site/_pages/implementations.html +++ b/_site/_pages/implementations.html @@ -974,9 +974,9 @@

NVIDIA BioNeMo Framewo diff --git a/_site/feed.xml b/_site/feed.xml index 5ccb0cc..2c35e39 100644 --- a/_site/feed.xml +++ b/_site/feed.xml @@ -1 +1 @@ -Jekyll2025-01-21T19:30:38+01:00http://localhost:4000/single-cell-transformer-papers/feed.xmlTransformers in Single-Cell OmicsA curated collection of papers on transformers in single-cell analysis \ No newline at end of file +Jekyll2025-02-25T09:42:38+01:00http://localhost:4000/single-cell-transformer-papers/feed.xmlTransformers in Single-Cell OmicsA curated collection of papers on transformers in single-cell analysis \ No newline at end of file diff --git a/_site/implementations.html b/_site/implementations.html index f9f4e09..7a53da2 100644 --- a/_site/implementations.html +++ b/_site/implementations.html @@ -974,9 +974,9 @@

NVIDIA BioNeMo Framewo diff --git a/_site/index.html b/_site/index.html index cc01bfb..48128a1 100644 --- a/_site/index.html +++ b/_site/index.html @@ -1004,9 +1004,9 @@

Citing this work diff --git a/_site/single-cell-transformers.html b/_site/single-cell-transformers.html index 60e4cc1..9f13571 100644 --- a/_site/single-cell-transformers.html +++ b/_site/single-cell-transformers.html @@ -962,9 +962,9 @@

Navigation

diff --git a/_site/transformer-evaluation.html b/_site/transformer-evaluation.html index 43b9d98..fab3667 100644 --- a/_site/transformer-evaluation.html +++ b/_site/transformer-evaluation.html @@ -962,9 +962,9 @@

Navigation

diff --git a/_site/transformer-llms.html b/_site/transformer-llms.html index a14a742..5e55ef7 100644 --- a/_site/transformer-llms.html +++ b/_site/transformer-llms.html @@ -962,9 +962,9 @@

Navigation