diff --git a/_data/single-cell-transformers.yml b/_data/single-cell-transformers.yml
index 3515fe0..0e39321 100644
--- a/_data/single-cell-transformers.yml
+++ b/_data/single-cell-transformers.yml
@@ -1,3 +1,15 @@
+- model: The Complexity of Automated Cell Type Annotations with GPT-4
+  paper:
+    type: preprint
+    text: '[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)'
+    url: https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2
+  code:
+    type: reproducible
+    text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/soulbio/cell_type_annotation)"
+    url: https://github.com/soulbio/cell_type_annotation
+
+
+
 - model: BioLLM
   paper:
     type: preprint
diff --git a/_site/_data/single-cell-transformers.yml b/_site/_data/single-cell-transformers.yml
index b5816e3..0e39321 100644
--- a/_site/_data/single-cell-transformers.yml
+++ b/_site/_data/single-cell-transformers.yml
@@ -1,3 +1,66 @@
+- model: The Complexity of Automated Cell Type Annotations with GPT-4
+  paper:
+    type: preprint
+    text: '[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)'
+    url: https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2
+  code:
+    type: reproducible
+    text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/soulbio/cell_type_annotation)"
+    url: https://github.com/soulbio/cell_type_annotation
+
+
+
+- model: BioLLM
+  paper:
+    type: preprint
+    text: '[Ping Qiu, et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf)'
+    url: https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf
+  code:
+    type: reproducible
+    text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/BGIResearch/BioLLM)"
+    url: https://github.com/BGIResearch/BioLLM
+  omic_modalities: '-'
+  pre_training_dataset: '-'
+  input_embedding: '-'
+  architecture: '-'
+  ssl_tasks: '-'
+  supervised_tasks: '-'
+
+
+
+
+- model: scGPT-spatial
+  paper:
+    type: preprint
+    text: '[Chloe Wang, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf)'
+    url: https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf
+  code:
+    type: reproducible
+    text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/bowang-lab/scGPT-spatial)"
+    url: https://github.com/bowang-lab/scGPT-spatial
+  omic_modalities: '-'
+  pre_training_dataset: '-'
+  input_embedding: '-'
+  architecture: '-'
+  ssl_tasks: '-'
+  supervised_tasks: '-'
+
+- model: scCello
+  paper:
+    type: peer_reviewed
+    text: '[Yuan, Xinyu, et al. 2024](https://openreview.net/pdf?id=aeYNVtTo7o)'
+    url: https://openreview.net/pdf?id=aeYNVtTo7o
+  code:
+    type: reproducible
+    text: "[ð\x9F\x9B\_ï¸\x8FGithub](https://github.com/DeepGraphLearning/scCello)"
+    url: https://github.com/DeepGraphLearning/scCello
+  omic_modalities: scRNA-seq
+  pre_training_dataset: 23M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))
+  input_embedding: 'Ordering: rank-based'
+  architecture: Encoder
+  ssl_tasks: 'Multi-level pre-training: MLM with CE loss for gene level modeling; an ontologybased cell-type coherence loss for intra-cellular level modeling; a relational alignment loss to inject cell-type lineage from cell ontology graph for inter-cellular level modeling'
+  supervised_tasks: 'fine-tuning tasks: cell type classification; zero-shot tasks: cell type annotation, marker gene prediction, novel cell type prediction, cancer drug prediction'
+
 - model: scGREAT
   paper:
     type: peer_reviewed
@@ -14,6 +77,22 @@
   ssl_tasks: '-'
   supervised_tasks: '-'
 
+- model: MAMMAL
+  paper:
+    type: preprint
+    text: '[Shoshan et al. 2024](https://arxiv.org/abs/2410.22367)'
+    url: https://arxiv.org/abs/2410.22367
+  code:
+    type: reproducible
+    text: "[ð\x9F\x9B\_ï¸\x8FGitHub](https://github.com/BiomedSciAI/biomed-multi-alignment)"
+    url: https://github.com/BiomedSciAI/biomed-multi-alignment
+  omic_modalities: bulk/scRNA-seq, amino acid sequences, SMILES molecule sequences
+  pre_training_dataset: CellXGene Human
+  input_embedding: '-'
+  architecture: T5 Encoder-Decoder
+  ssl_tasks: Expression-ranked gene masking (CELLxGENE Human), Protein LM (Uniref90), Antibody LM (OAS), Antibody Denoising (OAS), Small-Molecule LM (ZINC), Protein Interaction LM (STRING)
+  supervised_tasks: Cell type annotation (zheng68k), Cancer drug response prediction (GDSC1/2/3), Brain Blood Barrier Penetration prediction (MoleculeNet), Small-Molecule toxicity prediction (MoleculeNet), drug clinical trial result prediction (MoleculeNet), Antibody-Antigen binding prediction (HER2), Targeted antibody generation (SAbDAb), Protein-Protein delta-delta G  prediction (SKEMPI v2), Drug-Target interaction prediction (PEER), TCR binding prediction (Weber et al)  
+
 - model: Nicheformer
   paper:
     type: peer_reviewed
@@ -145,22 +224,6 @@
   supervised_tasks: '-'
 
 
-- model: scCello
-  paper:
-    type: preprint
-    text: '[Xinyu Yuan et al. 2024](https://github.com/theislab/single-cell-transformer-papers/issues/32)'
-    url: https://github.com/theislab/single-cell-transformer-papers/issues/32
-  code:
-    type: '-'
-    text: "[ð\x9F\x94\x8DGitHub](https://github.com/DeepGraphLearning/scCello)"
-    url: 'https://github.com/DeepGraphLearning/scCello'
-  omic_modalities: '-'
-  pre_training_dataset: '-'
-  input_embedding: '-'
-  architecture: '-'
-  ssl_tasks: '-'
-  supervised_tasks: '-'
-
 - model: scGenePT
   paper:
     type: preprint
diff --git a/_site/_data/transformer-evaluation.yml b/_site/_data/transformer-evaluation.yml
index bc35cf4..3a09984 100644
--- a/_site/_data/transformer-evaluation.yml
+++ b/_site/_data/transformer-evaluation.yml
@@ -26,22 +26,6 @@
   tasks: '-'
   notes: '-'
 
-
-- paper:
-    type: preprint
-    text: '[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)'
-    url: https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf
-  code:
-    type: 'reproducible'
-    text: '[ð\x9F\x9B\_ï¸\x8FGitHub](https://github.com/ggit12/anndictionary/)'
-    url: 'https://github.com/ggit12/anndictionary/'
-  omic_modalities: '-'
-  evaluated_transformers: '-'
-  tasks: '-'
-  notes: '-'
-
-
-
 - paper:
     type: preprint
     text: '[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)'
@@ -77,10 +61,10 @@
     type: 'reproducible'
     text: '[ð\x9F\x9B\_ï¸\x8FGitHub](https://github.com/aaronwtr/PertEval)'
     url: 'https://github.com/aaronwtr/PertEval'
-  omic_modalities: '-'
-  evaluated_transformers: '-'
-  tasks: '-'
-  notes: '-'
+  omic_modalities: 'scRNA-seq'
+  evaluated_transformers: 'UCE, scBERT, scGPT, Geneformer, scFoundation'
+  tasks: 'Transcriptomic perturbation prediction'
+  notes: 'Introduces PertEval-scFM, a benchmark to assess the zero-shot utility of single-cell foundation model embeddings for transcriptomic perturbation prediction. Uses SPECTRA to generate train-test splits with increasing dissimilarity to evaluate robustness against distribution shift. Models are evaluated with MSE and AUSPC, with AUSPC reflecting robustness under distribution shift. Additional analyses include E-distance and predicted transcriptomic distributions across the top 20 DEGs. Findings suggest that single-cell foundation model embeddings capture average perturbation effects but generally lack robustness to distribution shift. Ongoing work demonstrates that the domain-specific model GEARS outperforms foundation model embeddings, indicating that masked-language modeling on gene expression data without domain-specific inductive biases is insufficient for accurate transcriptomic perturbation prediction.'
 
 
 
@@ -194,3 +178,15 @@
   evaluated_transformers: scGPT, Geneformer, scBERT
   tasks: Cell type annotation
   notes: Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly.
+- paper:
+    type: preprint
+    text: '[Csendes et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1)'
+    url: https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1
+  code:
+    type: reproducible
+    text: "[ð\x9F\x9B\_ï¸\x8FGitHub](https://github.com/turbine-ai/PerturbSeqPredBenchmark)"
+    url: https://github.com/turbine-ai/PerturbSeqPredBenchmark
+  omic_modalities: scRNA-seq
+  evaluated_transformers: scGPT
+  tasks: Genetic perturbation effect prediction
+  notes: Simple baseline models can outperform scGPT on perturbational downstream tasks. The most widely used benchmarking datasets contain significant biases, making them suboptimal for evaluation.
diff --git a/_site/_pages/implementations.html b/_site/_pages/implementations.html
index f9f4e09..7a53da2 100644
--- a/_site/_pages/implementations.html
+++ b/_site/_pages/implementations.html
@@ -974,9 +974,9 @@ <h3><a href="https://github.com/NVIDIA/bionemo-framework">NVIDIA BioNeMo Framewo
 
     <!-- Include the data directly -->
     <script type="text/javascript">
-        window.singleCellTransformers = [{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://github.com/theislab/single-cell-transformer-papers/issues/32)","url":"https://github.com/theislab/single-cell-transformer-papers/issues/32"},"code":{"type":"-","text":"[ðGitHub](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
+        window.singleCellTransformers = [{"model":"The Complexity of Automated Cell Type Annotations with GPT-4","paper":{"type":"preprint","text":"[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)","url":"https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/soulbio/cell_type_annotation)","url":"https://github.com/soulbio/cell_type_annotation"}},{"model":"BioLLM","paper":{"type":"preprint","text":"[Ping Qiu, et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BGIResearch/BioLLM)","url":"https://github.com/BGIResearch/BioLLM"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGPT-spatial","paper":{"type":"preprint","text":"[Chloe Wang, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/bowang-lab/scGPT-spatial)","url":"https://github.com/bowang-lab/scGPT-spatial"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"peer_reviewed","text":"[Yuan, Xinyu, et al. 2024](https://openreview.net/pdf?id=aeYNVtTo7o)","url":"https://openreview.net/pdf?id=aeYNVtTo7o"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"scRNA-seq","pre_training_dataset":"23M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"Multi-level pre-training: MLM with CE loss for gene level modeling; an ontologybased cell-type coherence loss for intra-cellular level modeling; a relational alignment loss to inject cell-type lineage from cell ontology graph for inter-cellular level modeling","supervised_tasks":"fine-tuning tasks: cell type classification; zero-shot tasks: cell type annotation, marker gene prediction, novel cell type prediction, cancer drug prediction"},{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"MAMMAL","paper":{"type":"preprint","text":"[Shoshan et al. 2024](https://arxiv.org/abs/2410.22367)","url":"https://arxiv.org/abs/2410.22367"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/biomed-multi-alignment)","url":"https://github.com/BiomedSciAI/biomed-multi-alignment"},"omic_modalities":"bulk/scRNA-seq, amino acid sequences, SMILES molecule sequences","pre_training_dataset":"CellXGene Human","input_embedding":"-","architecture":"T5 Encoder-Decoder","ssl_tasks":"Expression-ranked gene masking (CELLxGENE Human), Protein LM (Uniref90), Antibody LM (OAS), Antibody Denoising (OAS), Small-Molecule LM (ZINC), Protein Interaction LM (STRING)","supervised_tasks":"Cell type annotation (zheng68k), Cancer drug response prediction (GDSC1/2/3), Brain Blood Barrier Penetration prediction (MoleculeNet), Small-Molecule toxicity prediction (MoleculeNet), drug clinical trial result prediction (MoleculeNet), Antibody-Antigen binding prediction (HER2), Targeted antibody generation (SAbDAb), Protein-Protein delta-delta G  prediction (SKEMPI v2), Drug-Target interaction prediction (PEER), TCR binding prediction (Weber et al)"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
         window.transformerLLMs = [{"model":"stFormer","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1"},"code":{"type":"reproducible","text":"[ðGitHub](https://github.com/ucaswangls/STFormer)","url":"https://github.com/ucaswangls/STFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scChat","paper":{"type":"preprint","text":"[Lu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/li-group/scChat)","url":"https://github.com/li-group/scChat"},"omic_modalities":"scRNA-seq","pre_training_dataset":"[GPT-4o](https://api.openai.com/)","input_embedding":"Other: Natural language descriptions","architecture":"[GPT-4o](https://api.openai.com/)","ssl_tasks":"-","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation, research hypothesis validation and generation, experiment design suggestions"},{"model":"CELLama","paper":{"type":"preprint","text":"[Choi et al. 2024](https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16)","url":"https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/portrai-io/CELLama)","url":"https://github.com/portrai-io/CELLama"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"Natural Language [SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","input_embedding":"Other: Ordering with embedding of the natural language representation, additional cell annotations are added in natural language","architecture":"[SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","ssl_tasks":"Contrastive loss","supervised_tasks":"Cell type annotation","zero_shot_tasks":"Cell type annotation, niche cell type featuring"},{"model":"CellWhisperer","paper":{"type":"preprint","text":"[Schaefer et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1)","url":"https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/epigen/cellwhisperer)","url":"https://github.com/epigen/cellwhisperer"},"omic_modalities":"Bulk/scRNA-seq","pre_training_dataset":"Transcriptome data paired with natural language annotations","input_embedding":"Geneformer- and BioBERT-based embedding models (contrastively fine-tuned)","architecture":"Multimodal contrastive training of embedding models (CLIP) and transcriptome instruction fine-tuning of LLM (LLaVA)","ssl_tasks":"-","supervised_tasks":"Transcriptome-aware question-answering","zero_shot_tasks":"Reference-free cell property prediction (cell types & states, disease states, organ of cell origin, ...)"},{"model":"scInterpreter","paper":{"type":"preprint","text":"[Li et al. 2024](https://arxiv.org/abs/2402.12405)","url":"https://arxiv.org/abs/2402.12405"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [GPT-3.5](https://api.openai.com/) and [Llama-13b](https://arxiv.org/abs/2302.13971)","input_embedding":"Other: Ordering with embedding of the natural language representation","architecture":"[GPT-3.5](https://api.openai.com/)","ssl_tasks":"NTP with CE loss and instruction finetuning (GPT-3.5 closed-source)","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation (LLMs frozen, only small MLP trained)"},{"model":"ChatCell","paper":{"type":"preprint","text":"[Fang et al. 2024](https://arxiv.org/abs/2402.08303)","url":"https://arxiv.org/abs/2402.08303"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/zjunlp/ChatCell)","url":"https://github.com/zjunlp/ChatCell"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [T5](https://huggingface.co/docs/transformers/en/model_doc/t5) and [natural language instructions](https://huggingface.co/datasets/zjunlp/ChatCell-Instructions)","input_embedding":"Other: Ordering with embedding as natural language with additional terms","architecture":"[T5](https://huggingface.co/docs/transformers/en/model_doc/t5)","ssl_tasks":"NTP with CE loss","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Simulation, cell type annotation, drug sensitivity prediction"},{"model":"MarkerGeneBERT","paper":{"type":"preprint","text":"[Cheng et al. 2023](https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, [PubMed](https://pubmed.ncbi.nlm.nih.gov/) and [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/)","input_embedding":"Other: Natural language preprocessed with [SciBERT](https://arxiv.org/abs/1903.10676)","architecture":"Encoder","ssl_tasks":"MLM","supervised_tasks":"Named Entity Recognition (NER), cell-biomarker sentence classification","zero_shot_tasks":"-"},{"model":"scELMo","paper":{"type":"preprint","text":"[Liu, Chen and Zheng 2023](https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/HelloWorldLTY/scELMo)","url":"https://github.com/HelloWorldLTY/scELMo"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Other: NLP model embeddings of features weighted by the feature level in a cell (e.g. expression level)","architecture":"Closed source (some open)","ssl_tasks":"Closed source (some open)","supervised_tasks":"Cell type annotation, Genetic perturbation effect prediction","zero_shot_tasks":"Cell and gene embeddings in other perturbation models"},{"model":"GenePT","paper":{"type":"preprint","text":"[Chen and Zou 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/yiqunchen/GenePT)","url":"https://github.com/yiqunchen/GenePT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"Gene function prediction","zero_shot_tasks":"Cell clustering, GRN inference"},{"model":"GPT-4","paper":{"type":"peer_reviewed","text":"[W. Hou and Z. Ji 2024](https://www.nature.com/articles/s41592-024-02235-4)","url":"https://www.nature.com/articles/s41592-024-02235-4"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/Winnie09/GPTCelltype)","url":"https://github.com/Winnie09/GPTCelltype"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Cell type annotation"},{"model":"Cell2Sentence","paper":{"type":"peer_reviewed","text":"[Levine et al. 2024](https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1)","url":"https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/vandijklab/cell2sentence-ft)","url":"https://github.com/vandijklab/cell2sentence-ft"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language ([GPT2](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)) and [scRNA-seq](https://www.science.org/doi/full/10.1126/science.abl5197?casa_token=KSZInYXxqU4AAAAA%3AuNgeqoX4vxOaMPGAv4UW9_GMy1lMmZ1-QGyx2VBCSbsGWvchKCzdNUvwt-h_yemzugH075TGz6N8fw) (40k / immune, human)","input_embedding":"Ordering: embedding as natural language","architecture":"Decoder","ssl_tasks":"NTP with CE loss","supervised_tasks":"-","zero_shot_tasks":"Simulation, cell type annotation"}];
-        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."}];
+        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"UCE, scBERT, scGPT, Geneformer, scFoundation","tasks":"Transcriptomic perturbation prediction","notes":"Introduces PertEval-scFM, a benchmark to assess the zero-shot utility of single-cell foundation model embeddings for transcriptomic perturbation prediction. Uses SPECTRA to generate train-test splits with increasing dissimilarity to evaluate robustness against distribution shift. Models are evaluated with MSE and AUSPC, with AUSPC reflecting robustness under distribution shift. Additional analyses include E-distance and predicted transcriptomic distributions across the top 20 DEGs. Findings suggest that single-cell foundation model embeddings capture average perturbation effects but generally lack robustness to distribution shift. Ongoing work demonstrates that the domain-specific model GEARS outperforms foundation model embeddings, indicating that masked-language modeling on gene expression data without domain-specific inductive biases is insufficient for accurate transcriptomic perturbation prediction."},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."},{"paper":{"type":"preprint","text":"[Csendes et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/turbine-ai/PerturbSeqPredBenchmark)","url":"https://github.com/turbine-ai/PerturbSeqPredBenchmark"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Genetic perturbation effect prediction","notes":"Simple baseline models can outperform scGPT on perturbational downstream tasks. The most widely used benchmarking datasets contain significant biases, making them suboptimal for evaluation."}];
     </script>
 
     <!-- Then load your bundles -->
diff --git a/_site/feed.xml b/_site/feed.xml
index 5ccb0cc..2c35e39 100644
--- a/_site/feed.xml
+++ b/_site/feed.xml
@@ -1 +1 @@
-<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="4.3.4">Jekyll</generator><link href="http://localhost:4000/single-cell-transformer-papers/feed.xml" rel="self" type="application/atom+xml" /><link href="http://localhost:4000/single-cell-transformer-papers/" rel="alternate" type="text/html" /><updated>2025-01-21T19:30:38+01:00</updated><id>http://localhost:4000/single-cell-transformer-papers/feed.xml</id><title type="html">Transformers in Single-Cell Omics</title><subtitle>A curated collection of papers on transformers in single-cell analysis</subtitle></feed>
\ No newline at end of file
+<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom" ><generator uri="https://jekyllrb.com/" version="4.3.4">Jekyll</generator><link href="http://localhost:4000/single-cell-transformer-papers/feed.xml" rel="self" type="application/atom+xml" /><link href="http://localhost:4000/single-cell-transformer-papers/" rel="alternate" type="text/html" /><updated>2025-02-25T09:42:38+01:00</updated><id>http://localhost:4000/single-cell-transformer-papers/feed.xml</id><title type="html">Transformers in Single-Cell Omics</title><subtitle>A curated collection of papers on transformers in single-cell analysis</subtitle></feed>
\ No newline at end of file
diff --git a/_site/implementations.html b/_site/implementations.html
index f9f4e09..7a53da2 100644
--- a/_site/implementations.html
+++ b/_site/implementations.html
@@ -974,9 +974,9 @@ <h3><a href="https://github.com/NVIDIA/bionemo-framework">NVIDIA BioNeMo Framewo
 
     <!-- Include the data directly -->
     <script type="text/javascript">
-        window.singleCellTransformers = [{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://github.com/theislab/single-cell-transformer-papers/issues/32)","url":"https://github.com/theislab/single-cell-transformer-papers/issues/32"},"code":{"type":"-","text":"[ðGitHub](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
+        window.singleCellTransformers = [{"model":"The Complexity of Automated Cell Type Annotations with GPT-4","paper":{"type":"preprint","text":"[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)","url":"https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/soulbio/cell_type_annotation)","url":"https://github.com/soulbio/cell_type_annotation"}},{"model":"BioLLM","paper":{"type":"preprint","text":"[Ping Qiu, et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BGIResearch/BioLLM)","url":"https://github.com/BGIResearch/BioLLM"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGPT-spatial","paper":{"type":"preprint","text":"[Chloe Wang, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/bowang-lab/scGPT-spatial)","url":"https://github.com/bowang-lab/scGPT-spatial"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"peer_reviewed","text":"[Yuan, Xinyu, et al. 2024](https://openreview.net/pdf?id=aeYNVtTo7o)","url":"https://openreview.net/pdf?id=aeYNVtTo7o"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"scRNA-seq","pre_training_dataset":"23M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"Multi-level pre-training: MLM with CE loss for gene level modeling; an ontologybased cell-type coherence loss for intra-cellular level modeling; a relational alignment loss to inject cell-type lineage from cell ontology graph for inter-cellular level modeling","supervised_tasks":"fine-tuning tasks: cell type classification; zero-shot tasks: cell type annotation, marker gene prediction, novel cell type prediction, cancer drug prediction"},{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"MAMMAL","paper":{"type":"preprint","text":"[Shoshan et al. 2024](https://arxiv.org/abs/2410.22367)","url":"https://arxiv.org/abs/2410.22367"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/biomed-multi-alignment)","url":"https://github.com/BiomedSciAI/biomed-multi-alignment"},"omic_modalities":"bulk/scRNA-seq, amino acid sequences, SMILES molecule sequences","pre_training_dataset":"CellXGene Human","input_embedding":"-","architecture":"T5 Encoder-Decoder","ssl_tasks":"Expression-ranked gene masking (CELLxGENE Human), Protein LM (Uniref90), Antibody LM (OAS), Antibody Denoising (OAS), Small-Molecule LM (ZINC), Protein Interaction LM (STRING)","supervised_tasks":"Cell type annotation (zheng68k), Cancer drug response prediction (GDSC1/2/3), Brain Blood Barrier Penetration prediction (MoleculeNet), Small-Molecule toxicity prediction (MoleculeNet), drug clinical trial result prediction (MoleculeNet), Antibody-Antigen binding prediction (HER2), Targeted antibody generation (SAbDAb), Protein-Protein delta-delta G  prediction (SKEMPI v2), Drug-Target interaction prediction (PEER), TCR binding prediction (Weber et al)"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
         window.transformerLLMs = [{"model":"stFormer","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1"},"code":{"type":"reproducible","text":"[ðGitHub](https://github.com/ucaswangls/STFormer)","url":"https://github.com/ucaswangls/STFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scChat","paper":{"type":"preprint","text":"[Lu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/li-group/scChat)","url":"https://github.com/li-group/scChat"},"omic_modalities":"scRNA-seq","pre_training_dataset":"[GPT-4o](https://api.openai.com/)","input_embedding":"Other: Natural language descriptions","architecture":"[GPT-4o](https://api.openai.com/)","ssl_tasks":"-","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation, research hypothesis validation and generation, experiment design suggestions"},{"model":"CELLama","paper":{"type":"preprint","text":"[Choi et al. 2024](https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16)","url":"https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/portrai-io/CELLama)","url":"https://github.com/portrai-io/CELLama"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"Natural Language [SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","input_embedding":"Other: Ordering with embedding of the natural language representation, additional cell annotations are added in natural language","architecture":"[SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","ssl_tasks":"Contrastive loss","supervised_tasks":"Cell type annotation","zero_shot_tasks":"Cell type annotation, niche cell type featuring"},{"model":"CellWhisperer","paper":{"type":"preprint","text":"[Schaefer et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1)","url":"https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/epigen/cellwhisperer)","url":"https://github.com/epigen/cellwhisperer"},"omic_modalities":"Bulk/scRNA-seq","pre_training_dataset":"Transcriptome data paired with natural language annotations","input_embedding":"Geneformer- and BioBERT-based embedding models (contrastively fine-tuned)","architecture":"Multimodal contrastive training of embedding models (CLIP) and transcriptome instruction fine-tuning of LLM (LLaVA)","ssl_tasks":"-","supervised_tasks":"Transcriptome-aware question-answering","zero_shot_tasks":"Reference-free cell property prediction (cell types & states, disease states, organ of cell origin, ...)"},{"model":"scInterpreter","paper":{"type":"preprint","text":"[Li et al. 2024](https://arxiv.org/abs/2402.12405)","url":"https://arxiv.org/abs/2402.12405"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [GPT-3.5](https://api.openai.com/) and [Llama-13b](https://arxiv.org/abs/2302.13971)","input_embedding":"Other: Ordering with embedding of the natural language representation","architecture":"[GPT-3.5](https://api.openai.com/)","ssl_tasks":"NTP with CE loss and instruction finetuning (GPT-3.5 closed-source)","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation (LLMs frozen, only small MLP trained)"},{"model":"ChatCell","paper":{"type":"preprint","text":"[Fang et al. 2024](https://arxiv.org/abs/2402.08303)","url":"https://arxiv.org/abs/2402.08303"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/zjunlp/ChatCell)","url":"https://github.com/zjunlp/ChatCell"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [T5](https://huggingface.co/docs/transformers/en/model_doc/t5) and [natural language instructions](https://huggingface.co/datasets/zjunlp/ChatCell-Instructions)","input_embedding":"Other: Ordering with embedding as natural language with additional terms","architecture":"[T5](https://huggingface.co/docs/transformers/en/model_doc/t5)","ssl_tasks":"NTP with CE loss","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Simulation, cell type annotation, drug sensitivity prediction"},{"model":"MarkerGeneBERT","paper":{"type":"preprint","text":"[Cheng et al. 2023](https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, [PubMed](https://pubmed.ncbi.nlm.nih.gov/) and [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/)","input_embedding":"Other: Natural language preprocessed with [SciBERT](https://arxiv.org/abs/1903.10676)","architecture":"Encoder","ssl_tasks":"MLM","supervised_tasks":"Named Entity Recognition (NER), cell-biomarker sentence classification","zero_shot_tasks":"-"},{"model":"scELMo","paper":{"type":"preprint","text":"[Liu, Chen and Zheng 2023](https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/HelloWorldLTY/scELMo)","url":"https://github.com/HelloWorldLTY/scELMo"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Other: NLP model embeddings of features weighted by the feature level in a cell (e.g. expression level)","architecture":"Closed source (some open)","ssl_tasks":"Closed source (some open)","supervised_tasks":"Cell type annotation, Genetic perturbation effect prediction","zero_shot_tasks":"Cell and gene embeddings in other perturbation models"},{"model":"GenePT","paper":{"type":"preprint","text":"[Chen and Zou 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/yiqunchen/GenePT)","url":"https://github.com/yiqunchen/GenePT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"Gene function prediction","zero_shot_tasks":"Cell clustering, GRN inference"},{"model":"GPT-4","paper":{"type":"peer_reviewed","text":"[W. Hou and Z. Ji 2024](https://www.nature.com/articles/s41592-024-02235-4)","url":"https://www.nature.com/articles/s41592-024-02235-4"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/Winnie09/GPTCelltype)","url":"https://github.com/Winnie09/GPTCelltype"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Cell type annotation"},{"model":"Cell2Sentence","paper":{"type":"peer_reviewed","text":"[Levine et al. 2024](https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1)","url":"https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/vandijklab/cell2sentence-ft)","url":"https://github.com/vandijklab/cell2sentence-ft"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language ([GPT2](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)) and [scRNA-seq](https://www.science.org/doi/full/10.1126/science.abl5197?casa_token=KSZInYXxqU4AAAAA%3AuNgeqoX4vxOaMPGAv4UW9_GMy1lMmZ1-QGyx2VBCSbsGWvchKCzdNUvwt-h_yemzugH075TGz6N8fw) (40k / immune, human)","input_embedding":"Ordering: embedding as natural language","architecture":"Decoder","ssl_tasks":"NTP with CE loss","supervised_tasks":"-","zero_shot_tasks":"Simulation, cell type annotation"}];
-        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."}];
+        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"UCE, scBERT, scGPT, Geneformer, scFoundation","tasks":"Transcriptomic perturbation prediction","notes":"Introduces PertEval-scFM, a benchmark to assess the zero-shot utility of single-cell foundation model embeddings for transcriptomic perturbation prediction. Uses SPECTRA to generate train-test splits with increasing dissimilarity to evaluate robustness against distribution shift. Models are evaluated with MSE and AUSPC, with AUSPC reflecting robustness under distribution shift. Additional analyses include E-distance and predicted transcriptomic distributions across the top 20 DEGs. Findings suggest that single-cell foundation model embeddings capture average perturbation effects but generally lack robustness to distribution shift. Ongoing work demonstrates that the domain-specific model GEARS outperforms foundation model embeddings, indicating that masked-language modeling on gene expression data without domain-specific inductive biases is insufficient for accurate transcriptomic perturbation prediction."},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."},{"paper":{"type":"preprint","text":"[Csendes et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/turbine-ai/PerturbSeqPredBenchmark)","url":"https://github.com/turbine-ai/PerturbSeqPredBenchmark"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Genetic perturbation effect prediction","notes":"Simple baseline models can outperform scGPT on perturbational downstream tasks. The most widely used benchmarking datasets contain significant biases, making them suboptimal for evaluation."}];
     </script>
 
     <!-- Then load your bundles -->
diff --git a/_site/index.html b/_site/index.html
index cc01bfb..48128a1 100644
--- a/_site/index.html
+++ b/_site/index.html
@@ -1004,9 +1004,9 @@ <h2 id="citing-this-work"><span class="text-center block">Citing this work</span
 
     <!-- Include the data directly -->
     <script type="text/javascript">
-        window.singleCellTransformers = [{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://github.com/theislab/single-cell-transformer-papers/issues/32)","url":"https://github.com/theislab/single-cell-transformer-papers/issues/32"},"code":{"type":"-","text":"[ðGitHub](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
+        window.singleCellTransformers = [{"model":"The Complexity of Automated Cell Type Annotations with GPT-4","paper":{"type":"preprint","text":"[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)","url":"https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/soulbio/cell_type_annotation)","url":"https://github.com/soulbio/cell_type_annotation"}},{"model":"BioLLM","paper":{"type":"preprint","text":"[Ping Qiu, et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BGIResearch/BioLLM)","url":"https://github.com/BGIResearch/BioLLM"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGPT-spatial","paper":{"type":"preprint","text":"[Chloe Wang, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/bowang-lab/scGPT-spatial)","url":"https://github.com/bowang-lab/scGPT-spatial"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"peer_reviewed","text":"[Yuan, Xinyu, et al. 2024](https://openreview.net/pdf?id=aeYNVtTo7o)","url":"https://openreview.net/pdf?id=aeYNVtTo7o"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"scRNA-seq","pre_training_dataset":"23M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"Multi-level pre-training: MLM with CE loss for gene level modeling; an ontologybased cell-type coherence loss for intra-cellular level modeling; a relational alignment loss to inject cell-type lineage from cell ontology graph for inter-cellular level modeling","supervised_tasks":"fine-tuning tasks: cell type classification; zero-shot tasks: cell type annotation, marker gene prediction, novel cell type prediction, cancer drug prediction"},{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"MAMMAL","paper":{"type":"preprint","text":"[Shoshan et al. 2024](https://arxiv.org/abs/2410.22367)","url":"https://arxiv.org/abs/2410.22367"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/biomed-multi-alignment)","url":"https://github.com/BiomedSciAI/biomed-multi-alignment"},"omic_modalities":"bulk/scRNA-seq, amino acid sequences, SMILES molecule sequences","pre_training_dataset":"CellXGene Human","input_embedding":"-","architecture":"T5 Encoder-Decoder","ssl_tasks":"Expression-ranked gene masking (CELLxGENE Human), Protein LM (Uniref90), Antibody LM (OAS), Antibody Denoising (OAS), Small-Molecule LM (ZINC), Protein Interaction LM (STRING)","supervised_tasks":"Cell type annotation (zheng68k), Cancer drug response prediction (GDSC1/2/3), Brain Blood Barrier Penetration prediction (MoleculeNet), Small-Molecule toxicity prediction (MoleculeNet), drug clinical trial result prediction (MoleculeNet), Antibody-Antigen binding prediction (HER2), Targeted antibody generation (SAbDAb), Protein-Protein delta-delta G  prediction (SKEMPI v2), Drug-Target interaction prediction (PEER), TCR binding prediction (Weber et al)"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
         window.transformerLLMs = [{"model":"stFormer","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1"},"code":{"type":"reproducible","text":"[ðGitHub](https://github.com/ucaswangls/STFormer)","url":"https://github.com/ucaswangls/STFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scChat","paper":{"type":"preprint","text":"[Lu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/li-group/scChat)","url":"https://github.com/li-group/scChat"},"omic_modalities":"scRNA-seq","pre_training_dataset":"[GPT-4o](https://api.openai.com/)","input_embedding":"Other: Natural language descriptions","architecture":"[GPT-4o](https://api.openai.com/)","ssl_tasks":"-","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation, research hypothesis validation and generation, experiment design suggestions"},{"model":"CELLama","paper":{"type":"preprint","text":"[Choi et al. 2024](https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16)","url":"https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/portrai-io/CELLama)","url":"https://github.com/portrai-io/CELLama"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"Natural Language [SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","input_embedding":"Other: Ordering with embedding of the natural language representation, additional cell annotations are added in natural language","architecture":"[SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","ssl_tasks":"Contrastive loss","supervised_tasks":"Cell type annotation","zero_shot_tasks":"Cell type annotation, niche cell type featuring"},{"model":"CellWhisperer","paper":{"type":"preprint","text":"[Schaefer et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1)","url":"https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/epigen/cellwhisperer)","url":"https://github.com/epigen/cellwhisperer"},"omic_modalities":"Bulk/scRNA-seq","pre_training_dataset":"Transcriptome data paired with natural language annotations","input_embedding":"Geneformer- and BioBERT-based embedding models (contrastively fine-tuned)","architecture":"Multimodal contrastive training of embedding models (CLIP) and transcriptome instruction fine-tuning of LLM (LLaVA)","ssl_tasks":"-","supervised_tasks":"Transcriptome-aware question-answering","zero_shot_tasks":"Reference-free cell property prediction (cell types & states, disease states, organ of cell origin, ...)"},{"model":"scInterpreter","paper":{"type":"preprint","text":"[Li et al. 2024](https://arxiv.org/abs/2402.12405)","url":"https://arxiv.org/abs/2402.12405"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [GPT-3.5](https://api.openai.com/) and [Llama-13b](https://arxiv.org/abs/2302.13971)","input_embedding":"Other: Ordering with embedding of the natural language representation","architecture":"[GPT-3.5](https://api.openai.com/)","ssl_tasks":"NTP with CE loss and instruction finetuning (GPT-3.5 closed-source)","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation (LLMs frozen, only small MLP trained)"},{"model":"ChatCell","paper":{"type":"preprint","text":"[Fang et al. 2024](https://arxiv.org/abs/2402.08303)","url":"https://arxiv.org/abs/2402.08303"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/zjunlp/ChatCell)","url":"https://github.com/zjunlp/ChatCell"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [T5](https://huggingface.co/docs/transformers/en/model_doc/t5) and [natural language instructions](https://huggingface.co/datasets/zjunlp/ChatCell-Instructions)","input_embedding":"Other: Ordering with embedding as natural language with additional terms","architecture":"[T5](https://huggingface.co/docs/transformers/en/model_doc/t5)","ssl_tasks":"NTP with CE loss","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Simulation, cell type annotation, drug sensitivity prediction"},{"model":"MarkerGeneBERT","paper":{"type":"preprint","text":"[Cheng et al. 2023](https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, [PubMed](https://pubmed.ncbi.nlm.nih.gov/) and [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/)","input_embedding":"Other: Natural language preprocessed with [SciBERT](https://arxiv.org/abs/1903.10676)","architecture":"Encoder","ssl_tasks":"MLM","supervised_tasks":"Named Entity Recognition (NER), cell-biomarker sentence classification","zero_shot_tasks":"-"},{"model":"scELMo","paper":{"type":"preprint","text":"[Liu, Chen and Zheng 2023](https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/HelloWorldLTY/scELMo)","url":"https://github.com/HelloWorldLTY/scELMo"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Other: NLP model embeddings of features weighted by the feature level in a cell (e.g. expression level)","architecture":"Closed source (some open)","ssl_tasks":"Closed source (some open)","supervised_tasks":"Cell type annotation, Genetic perturbation effect prediction","zero_shot_tasks":"Cell and gene embeddings in other perturbation models"},{"model":"GenePT","paper":{"type":"preprint","text":"[Chen and Zou 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/yiqunchen/GenePT)","url":"https://github.com/yiqunchen/GenePT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"Gene function prediction","zero_shot_tasks":"Cell clustering, GRN inference"},{"model":"GPT-4","paper":{"type":"peer_reviewed","text":"[W. Hou and Z. Ji 2024](https://www.nature.com/articles/s41592-024-02235-4)","url":"https://www.nature.com/articles/s41592-024-02235-4"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/Winnie09/GPTCelltype)","url":"https://github.com/Winnie09/GPTCelltype"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Cell type annotation"},{"model":"Cell2Sentence","paper":{"type":"peer_reviewed","text":"[Levine et al. 2024](https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1)","url":"https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/vandijklab/cell2sentence-ft)","url":"https://github.com/vandijklab/cell2sentence-ft"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language ([GPT2](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)) and [scRNA-seq](https://www.science.org/doi/full/10.1126/science.abl5197?casa_token=KSZInYXxqU4AAAAA%3AuNgeqoX4vxOaMPGAv4UW9_GMy1lMmZ1-QGyx2VBCSbsGWvchKCzdNUvwt-h_yemzugH075TGz6N8fw) (40k / immune, human)","input_embedding":"Ordering: embedding as natural language","architecture":"Decoder","ssl_tasks":"NTP with CE loss","supervised_tasks":"-","zero_shot_tasks":"Simulation, cell type annotation"}];
-        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."}];
+        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"UCE, scBERT, scGPT, Geneformer, scFoundation","tasks":"Transcriptomic perturbation prediction","notes":"Introduces PertEval-scFM, a benchmark to assess the zero-shot utility of single-cell foundation model embeddings for transcriptomic perturbation prediction. Uses SPECTRA to generate train-test splits with increasing dissimilarity to evaluate robustness against distribution shift. Models are evaluated with MSE and AUSPC, with AUSPC reflecting robustness under distribution shift. Additional analyses include E-distance and predicted transcriptomic distributions across the top 20 DEGs. Findings suggest that single-cell foundation model embeddings capture average perturbation effects but generally lack robustness to distribution shift. Ongoing work demonstrates that the domain-specific model GEARS outperforms foundation model embeddings, indicating that masked-language modeling on gene expression data without domain-specific inductive biases is insufficient for accurate transcriptomic perturbation prediction."},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."},{"paper":{"type":"preprint","text":"[Csendes et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/turbine-ai/PerturbSeqPredBenchmark)","url":"https://github.com/turbine-ai/PerturbSeqPredBenchmark"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Genetic perturbation effect prediction","notes":"Simple baseline models can outperform scGPT on perturbational downstream tasks. The most widely used benchmarking datasets contain significant biases, making them suboptimal for evaluation."}];
     </script>
 
     <!-- Then load your bundles -->
diff --git a/_site/single-cell-transformers.html b/_site/single-cell-transformers.html
index 60e4cc1..9f13571 100644
--- a/_site/single-cell-transformers.html
+++ b/_site/single-cell-transformers.html
@@ -962,9 +962,9 @@ <h2 class="text-2xl font-bold mb-8">Navigation</h2>
 
     <!-- Include the data directly -->
     <script type="text/javascript">
-        window.singleCellTransformers = [{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://github.com/theislab/single-cell-transformer-papers/issues/32)","url":"https://github.com/theislab/single-cell-transformer-papers/issues/32"},"code":{"type":"-","text":"[ðGitHub](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
+        window.singleCellTransformers = [{"model":"The Complexity of Automated Cell Type Annotations with GPT-4","paper":{"type":"preprint","text":"[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)","url":"https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/soulbio/cell_type_annotation)","url":"https://github.com/soulbio/cell_type_annotation"}},{"model":"BioLLM","paper":{"type":"preprint","text":"[Ping Qiu, et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BGIResearch/BioLLM)","url":"https://github.com/BGIResearch/BioLLM"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGPT-spatial","paper":{"type":"preprint","text":"[Chloe Wang, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/bowang-lab/scGPT-spatial)","url":"https://github.com/bowang-lab/scGPT-spatial"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"peer_reviewed","text":"[Yuan, Xinyu, et al. 2024](https://openreview.net/pdf?id=aeYNVtTo7o)","url":"https://openreview.net/pdf?id=aeYNVtTo7o"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"scRNA-seq","pre_training_dataset":"23M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"Multi-level pre-training: MLM with CE loss for gene level modeling; an ontologybased cell-type coherence loss for intra-cellular level modeling; a relational alignment loss to inject cell-type lineage from cell ontology graph for inter-cellular level modeling","supervised_tasks":"fine-tuning tasks: cell type classification; zero-shot tasks: cell type annotation, marker gene prediction, novel cell type prediction, cancer drug prediction"},{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"MAMMAL","paper":{"type":"preprint","text":"[Shoshan et al. 2024](https://arxiv.org/abs/2410.22367)","url":"https://arxiv.org/abs/2410.22367"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/biomed-multi-alignment)","url":"https://github.com/BiomedSciAI/biomed-multi-alignment"},"omic_modalities":"bulk/scRNA-seq, amino acid sequences, SMILES molecule sequences","pre_training_dataset":"CellXGene Human","input_embedding":"-","architecture":"T5 Encoder-Decoder","ssl_tasks":"Expression-ranked gene masking (CELLxGENE Human), Protein LM (Uniref90), Antibody LM (OAS), Antibody Denoising (OAS), Small-Molecule LM (ZINC), Protein Interaction LM (STRING)","supervised_tasks":"Cell type annotation (zheng68k), Cancer drug response prediction (GDSC1/2/3), Brain Blood Barrier Penetration prediction (MoleculeNet), Small-Molecule toxicity prediction (MoleculeNet), drug clinical trial result prediction (MoleculeNet), Antibody-Antigen binding prediction (HER2), Targeted antibody generation (SAbDAb), Protein-Protein delta-delta G  prediction (SKEMPI v2), Drug-Target interaction prediction (PEER), TCR binding prediction (Weber et al)"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
         window.transformerLLMs = [{"model":"stFormer","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1"},"code":{"type":"reproducible","text":"[ðGitHub](https://github.com/ucaswangls/STFormer)","url":"https://github.com/ucaswangls/STFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scChat","paper":{"type":"preprint","text":"[Lu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/li-group/scChat)","url":"https://github.com/li-group/scChat"},"omic_modalities":"scRNA-seq","pre_training_dataset":"[GPT-4o](https://api.openai.com/)","input_embedding":"Other: Natural language descriptions","architecture":"[GPT-4o](https://api.openai.com/)","ssl_tasks":"-","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation, research hypothesis validation and generation, experiment design suggestions"},{"model":"CELLama","paper":{"type":"preprint","text":"[Choi et al. 2024](https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16)","url":"https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/portrai-io/CELLama)","url":"https://github.com/portrai-io/CELLama"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"Natural Language [SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","input_embedding":"Other: Ordering with embedding of the natural language representation, additional cell annotations are added in natural language","architecture":"[SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","ssl_tasks":"Contrastive loss","supervised_tasks":"Cell type annotation","zero_shot_tasks":"Cell type annotation, niche cell type featuring"},{"model":"CellWhisperer","paper":{"type":"preprint","text":"[Schaefer et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1)","url":"https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/epigen/cellwhisperer)","url":"https://github.com/epigen/cellwhisperer"},"omic_modalities":"Bulk/scRNA-seq","pre_training_dataset":"Transcriptome data paired with natural language annotations","input_embedding":"Geneformer- and BioBERT-based embedding models (contrastively fine-tuned)","architecture":"Multimodal contrastive training of embedding models (CLIP) and transcriptome instruction fine-tuning of LLM (LLaVA)","ssl_tasks":"-","supervised_tasks":"Transcriptome-aware question-answering","zero_shot_tasks":"Reference-free cell property prediction (cell types & states, disease states, organ of cell origin, ...)"},{"model":"scInterpreter","paper":{"type":"preprint","text":"[Li et al. 2024](https://arxiv.org/abs/2402.12405)","url":"https://arxiv.org/abs/2402.12405"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [GPT-3.5](https://api.openai.com/) and [Llama-13b](https://arxiv.org/abs/2302.13971)","input_embedding":"Other: Ordering with embedding of the natural language representation","architecture":"[GPT-3.5](https://api.openai.com/)","ssl_tasks":"NTP with CE loss and instruction finetuning (GPT-3.5 closed-source)","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation (LLMs frozen, only small MLP trained)"},{"model":"ChatCell","paper":{"type":"preprint","text":"[Fang et al. 2024](https://arxiv.org/abs/2402.08303)","url":"https://arxiv.org/abs/2402.08303"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/zjunlp/ChatCell)","url":"https://github.com/zjunlp/ChatCell"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [T5](https://huggingface.co/docs/transformers/en/model_doc/t5) and [natural language instructions](https://huggingface.co/datasets/zjunlp/ChatCell-Instructions)","input_embedding":"Other: Ordering with embedding as natural language with additional terms","architecture":"[T5](https://huggingface.co/docs/transformers/en/model_doc/t5)","ssl_tasks":"NTP with CE loss","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Simulation, cell type annotation, drug sensitivity prediction"},{"model":"MarkerGeneBERT","paper":{"type":"preprint","text":"[Cheng et al. 2023](https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, [PubMed](https://pubmed.ncbi.nlm.nih.gov/) and [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/)","input_embedding":"Other: Natural language preprocessed with [SciBERT](https://arxiv.org/abs/1903.10676)","architecture":"Encoder","ssl_tasks":"MLM","supervised_tasks":"Named Entity Recognition (NER), cell-biomarker sentence classification","zero_shot_tasks":"-"},{"model":"scELMo","paper":{"type":"preprint","text":"[Liu, Chen and Zheng 2023](https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/HelloWorldLTY/scELMo)","url":"https://github.com/HelloWorldLTY/scELMo"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Other: NLP model embeddings of features weighted by the feature level in a cell (e.g. expression level)","architecture":"Closed source (some open)","ssl_tasks":"Closed source (some open)","supervised_tasks":"Cell type annotation, Genetic perturbation effect prediction","zero_shot_tasks":"Cell and gene embeddings in other perturbation models"},{"model":"GenePT","paper":{"type":"preprint","text":"[Chen and Zou 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/yiqunchen/GenePT)","url":"https://github.com/yiqunchen/GenePT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"Gene function prediction","zero_shot_tasks":"Cell clustering, GRN inference"},{"model":"GPT-4","paper":{"type":"peer_reviewed","text":"[W. Hou and Z. Ji 2024](https://www.nature.com/articles/s41592-024-02235-4)","url":"https://www.nature.com/articles/s41592-024-02235-4"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/Winnie09/GPTCelltype)","url":"https://github.com/Winnie09/GPTCelltype"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Cell type annotation"},{"model":"Cell2Sentence","paper":{"type":"peer_reviewed","text":"[Levine et al. 2024](https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1)","url":"https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/vandijklab/cell2sentence-ft)","url":"https://github.com/vandijklab/cell2sentence-ft"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language ([GPT2](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)) and [scRNA-seq](https://www.science.org/doi/full/10.1126/science.abl5197?casa_token=KSZInYXxqU4AAAAA%3AuNgeqoX4vxOaMPGAv4UW9_GMy1lMmZ1-QGyx2VBCSbsGWvchKCzdNUvwt-h_yemzugH075TGz6N8fw) (40k / immune, human)","input_embedding":"Ordering: embedding as natural language","architecture":"Decoder","ssl_tasks":"NTP with CE loss","supervised_tasks":"-","zero_shot_tasks":"Simulation, cell type annotation"}];
-        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."}];
+        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"UCE, scBERT, scGPT, Geneformer, scFoundation","tasks":"Transcriptomic perturbation prediction","notes":"Introduces PertEval-scFM, a benchmark to assess the zero-shot utility of single-cell foundation model embeddings for transcriptomic perturbation prediction. Uses SPECTRA to generate train-test splits with increasing dissimilarity to evaluate robustness against distribution shift. Models are evaluated with MSE and AUSPC, with AUSPC reflecting robustness under distribution shift. Additional analyses include E-distance and predicted transcriptomic distributions across the top 20 DEGs. Findings suggest that single-cell foundation model embeddings capture average perturbation effects but generally lack robustness to distribution shift. Ongoing work demonstrates that the domain-specific model GEARS outperforms foundation model embeddings, indicating that masked-language modeling on gene expression data without domain-specific inductive biases is insufficient for accurate transcriptomic perturbation prediction."},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."},{"paper":{"type":"preprint","text":"[Csendes et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/turbine-ai/PerturbSeqPredBenchmark)","url":"https://github.com/turbine-ai/PerturbSeqPredBenchmark"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Genetic perturbation effect prediction","notes":"Simple baseline models can outperform scGPT on perturbational downstream tasks. The most widely used benchmarking datasets contain significant biases, making them suboptimal for evaluation."}];
     </script>
 
     <!-- Then load your bundles -->
diff --git a/_site/transformer-evaluation.html b/_site/transformer-evaluation.html
index 43b9d98..fab3667 100644
--- a/_site/transformer-evaluation.html
+++ b/_site/transformer-evaluation.html
@@ -962,9 +962,9 @@ <h2 class="text-2xl font-bold mb-8">Navigation</h2>
 
     <!-- Include the data directly -->
     <script type="text/javascript">
-        window.singleCellTransformers = [{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://github.com/theislab/single-cell-transformer-papers/issues/32)","url":"https://github.com/theislab/single-cell-transformer-papers/issues/32"},"code":{"type":"-","text":"[ðGitHub](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
+        window.singleCellTransformers = [{"model":"The Complexity of Automated Cell Type Annotations with GPT-4","paper":{"type":"preprint","text":"[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)","url":"https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/soulbio/cell_type_annotation)","url":"https://github.com/soulbio/cell_type_annotation"}},{"model":"BioLLM","paper":{"type":"preprint","text":"[Ping Qiu, et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BGIResearch/BioLLM)","url":"https://github.com/BGIResearch/BioLLM"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGPT-spatial","paper":{"type":"preprint","text":"[Chloe Wang, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/bowang-lab/scGPT-spatial)","url":"https://github.com/bowang-lab/scGPT-spatial"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"peer_reviewed","text":"[Yuan, Xinyu, et al. 2024](https://openreview.net/pdf?id=aeYNVtTo7o)","url":"https://openreview.net/pdf?id=aeYNVtTo7o"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"scRNA-seq","pre_training_dataset":"23M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"Multi-level pre-training: MLM with CE loss for gene level modeling; an ontologybased cell-type coherence loss for intra-cellular level modeling; a relational alignment loss to inject cell-type lineage from cell ontology graph for inter-cellular level modeling","supervised_tasks":"fine-tuning tasks: cell type classification; zero-shot tasks: cell type annotation, marker gene prediction, novel cell type prediction, cancer drug prediction"},{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"MAMMAL","paper":{"type":"preprint","text":"[Shoshan et al. 2024](https://arxiv.org/abs/2410.22367)","url":"https://arxiv.org/abs/2410.22367"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/biomed-multi-alignment)","url":"https://github.com/BiomedSciAI/biomed-multi-alignment"},"omic_modalities":"bulk/scRNA-seq, amino acid sequences, SMILES molecule sequences","pre_training_dataset":"CellXGene Human","input_embedding":"-","architecture":"T5 Encoder-Decoder","ssl_tasks":"Expression-ranked gene masking (CELLxGENE Human), Protein LM (Uniref90), Antibody LM (OAS), Antibody Denoising (OAS), Small-Molecule LM (ZINC), Protein Interaction LM (STRING)","supervised_tasks":"Cell type annotation (zheng68k), Cancer drug response prediction (GDSC1/2/3), Brain Blood Barrier Penetration prediction (MoleculeNet), Small-Molecule toxicity prediction (MoleculeNet), drug clinical trial result prediction (MoleculeNet), Antibody-Antigen binding prediction (HER2), Targeted antibody generation (SAbDAb), Protein-Protein delta-delta G  prediction (SKEMPI v2), Drug-Target interaction prediction (PEER), TCR binding prediction (Weber et al)"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
         window.transformerLLMs = [{"model":"stFormer","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1"},"code":{"type":"reproducible","text":"[ðGitHub](https://github.com/ucaswangls/STFormer)","url":"https://github.com/ucaswangls/STFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scChat","paper":{"type":"preprint","text":"[Lu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/li-group/scChat)","url":"https://github.com/li-group/scChat"},"omic_modalities":"scRNA-seq","pre_training_dataset":"[GPT-4o](https://api.openai.com/)","input_embedding":"Other: Natural language descriptions","architecture":"[GPT-4o](https://api.openai.com/)","ssl_tasks":"-","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation, research hypothesis validation and generation, experiment design suggestions"},{"model":"CELLama","paper":{"type":"preprint","text":"[Choi et al. 2024](https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16)","url":"https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/portrai-io/CELLama)","url":"https://github.com/portrai-io/CELLama"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"Natural Language [SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","input_embedding":"Other: Ordering with embedding of the natural language representation, additional cell annotations are added in natural language","architecture":"[SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","ssl_tasks":"Contrastive loss","supervised_tasks":"Cell type annotation","zero_shot_tasks":"Cell type annotation, niche cell type featuring"},{"model":"CellWhisperer","paper":{"type":"preprint","text":"[Schaefer et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1)","url":"https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/epigen/cellwhisperer)","url":"https://github.com/epigen/cellwhisperer"},"omic_modalities":"Bulk/scRNA-seq","pre_training_dataset":"Transcriptome data paired with natural language annotations","input_embedding":"Geneformer- and BioBERT-based embedding models (contrastively fine-tuned)","architecture":"Multimodal contrastive training of embedding models (CLIP) and transcriptome instruction fine-tuning of LLM (LLaVA)","ssl_tasks":"-","supervised_tasks":"Transcriptome-aware question-answering","zero_shot_tasks":"Reference-free cell property prediction (cell types & states, disease states, organ of cell origin, ...)"},{"model":"scInterpreter","paper":{"type":"preprint","text":"[Li et al. 2024](https://arxiv.org/abs/2402.12405)","url":"https://arxiv.org/abs/2402.12405"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [GPT-3.5](https://api.openai.com/) and [Llama-13b](https://arxiv.org/abs/2302.13971)","input_embedding":"Other: Ordering with embedding of the natural language representation","architecture":"[GPT-3.5](https://api.openai.com/)","ssl_tasks":"NTP with CE loss and instruction finetuning (GPT-3.5 closed-source)","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation (LLMs frozen, only small MLP trained)"},{"model":"ChatCell","paper":{"type":"preprint","text":"[Fang et al. 2024](https://arxiv.org/abs/2402.08303)","url":"https://arxiv.org/abs/2402.08303"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/zjunlp/ChatCell)","url":"https://github.com/zjunlp/ChatCell"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [T5](https://huggingface.co/docs/transformers/en/model_doc/t5) and [natural language instructions](https://huggingface.co/datasets/zjunlp/ChatCell-Instructions)","input_embedding":"Other: Ordering with embedding as natural language with additional terms","architecture":"[T5](https://huggingface.co/docs/transformers/en/model_doc/t5)","ssl_tasks":"NTP with CE loss","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Simulation, cell type annotation, drug sensitivity prediction"},{"model":"MarkerGeneBERT","paper":{"type":"preprint","text":"[Cheng et al. 2023](https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, [PubMed](https://pubmed.ncbi.nlm.nih.gov/) and [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/)","input_embedding":"Other: Natural language preprocessed with [SciBERT](https://arxiv.org/abs/1903.10676)","architecture":"Encoder","ssl_tasks":"MLM","supervised_tasks":"Named Entity Recognition (NER), cell-biomarker sentence classification","zero_shot_tasks":"-"},{"model":"scELMo","paper":{"type":"preprint","text":"[Liu, Chen and Zheng 2023](https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/HelloWorldLTY/scELMo)","url":"https://github.com/HelloWorldLTY/scELMo"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Other: NLP model embeddings of features weighted by the feature level in a cell (e.g. expression level)","architecture":"Closed source (some open)","ssl_tasks":"Closed source (some open)","supervised_tasks":"Cell type annotation, Genetic perturbation effect prediction","zero_shot_tasks":"Cell and gene embeddings in other perturbation models"},{"model":"GenePT","paper":{"type":"preprint","text":"[Chen and Zou 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/yiqunchen/GenePT)","url":"https://github.com/yiqunchen/GenePT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"Gene function prediction","zero_shot_tasks":"Cell clustering, GRN inference"},{"model":"GPT-4","paper":{"type":"peer_reviewed","text":"[W. Hou and Z. Ji 2024](https://www.nature.com/articles/s41592-024-02235-4)","url":"https://www.nature.com/articles/s41592-024-02235-4"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/Winnie09/GPTCelltype)","url":"https://github.com/Winnie09/GPTCelltype"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Cell type annotation"},{"model":"Cell2Sentence","paper":{"type":"peer_reviewed","text":"[Levine et al. 2024](https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1)","url":"https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/vandijklab/cell2sentence-ft)","url":"https://github.com/vandijklab/cell2sentence-ft"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language ([GPT2](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)) and [scRNA-seq](https://www.science.org/doi/full/10.1126/science.abl5197?casa_token=KSZInYXxqU4AAAAA%3AuNgeqoX4vxOaMPGAv4UW9_GMy1lMmZ1-QGyx2VBCSbsGWvchKCzdNUvwt-h_yemzugH075TGz6N8fw) (40k / immune, human)","input_embedding":"Ordering: embedding as natural language","architecture":"Decoder","ssl_tasks":"NTP with CE loss","supervised_tasks":"-","zero_shot_tasks":"Simulation, cell type annotation"}];
-        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."}];
+        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"UCE, scBERT, scGPT, Geneformer, scFoundation","tasks":"Transcriptomic perturbation prediction","notes":"Introduces PertEval-scFM, a benchmark to assess the zero-shot utility of single-cell foundation model embeddings for transcriptomic perturbation prediction. Uses SPECTRA to generate train-test splits with increasing dissimilarity to evaluate robustness against distribution shift. Models are evaluated with MSE and AUSPC, with AUSPC reflecting robustness under distribution shift. Additional analyses include E-distance and predicted transcriptomic distributions across the top 20 DEGs. Findings suggest that single-cell foundation model embeddings capture average perturbation effects but generally lack robustness to distribution shift. Ongoing work demonstrates that the domain-specific model GEARS outperforms foundation model embeddings, indicating that masked-language modeling on gene expression data without domain-specific inductive biases is insufficient for accurate transcriptomic perturbation prediction."},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."},{"paper":{"type":"preprint","text":"[Csendes et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/turbine-ai/PerturbSeqPredBenchmark)","url":"https://github.com/turbine-ai/PerturbSeqPredBenchmark"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Genetic perturbation effect prediction","notes":"Simple baseline models can outperform scGPT on perturbational downstream tasks. The most widely used benchmarking datasets contain significant biases, making them suboptimal for evaluation."}];
     </script>
 
     <!-- Then load your bundles -->
diff --git a/_site/transformer-llms.html b/_site/transformer-llms.html
index a14a742..5e55ef7 100644
--- a/_site/transformer-llms.html
+++ b/_site/transformer-llms.html
@@ -962,9 +962,9 @@ <h2 class="text-2xl font-bold mb-8">Navigation</h2>
 
     <!-- Include the data directly -->
     <script type="text/javascript">
-        window.singleCellTransformers = [{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://github.com/theislab/single-cell-transformer-papers/issues/32)","url":"https://github.com/theislab/single-cell-transformer-papers/issues/32"},"code":{"type":"-","text":"[ðGitHub](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
+        window.singleCellTransformers = [{"model":"The Complexity of Automated Cell Type Annotations with GPT-4","paper":{"type":"preprint","text":"[Soumya Luthra, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2)","url":"https://www.biorxiv.org/content/10.1101/2025.02.11.637659v2"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/soulbio/cell_type_annotation)","url":"https://github.com/soulbio/cell_type_annotation"}},{"model":"BioLLM","paper":{"type":"preprint","text":"[Ping Qiu, et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.22.624786v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BGIResearch/BioLLM)","url":"https://github.com/BGIResearch/BioLLM"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGPT-spatial","paper":{"type":"preprint","text":"[Chloe Wang, et al. 2024](https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.02.05.636714v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/bowang-lab/scGPT-spatial)","url":"https://github.com/bowang-lab/scGPT-spatial"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scCello","paper":{"type":"peer_reviewed","text":"[Yuan, Xinyu, et al. 2024](https://openreview.net/pdf?id=aeYNVtTo7o)","url":"https://openreview.net/pdf?id=aeYNVtTo7o"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/DeepGraphLearning/scCello)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"scRNA-seq","pre_training_dataset":"23M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"Multi-level pre-training: MLM with CE loss for gene level modeling; an ontologybased cell-type coherence loss for intra-cellular level modeling; a relational alignment loss to inject cell-type lineage from cell ontology graph for inter-cellular level modeling","supervised_tasks":"fine-tuning tasks: cell type classification; zero-shot tasks: cell type annotation, marker gene prediction, novel cell type prediction, cancer drug prediction"},{"model":"scGREAT","paper":{"type":"peer_reviewed","text":"[Yuchen Wang et al. 2024](https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf)","url":"https://www.cell.com/iscience/pdf/S2589-0042%2824%2900573-X.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/WangyuchenCS/scGREAT)","url":"https://github.com/WangyuchenCS/scGREAT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"MAMMAL","paper":{"type":"preprint","text":"[Shoshan et al. 2024](https://arxiv.org/abs/2410.22367)","url":"https://arxiv.org/abs/2410.22367"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/biomed-multi-alignment)","url":"https://github.com/BiomedSciAI/biomed-multi-alignment"},"omic_modalities":"bulk/scRNA-seq, amino acid sequences, SMILES molecule sequences","pre_training_dataset":"CellXGene Human","input_embedding":"-","architecture":"T5 Encoder-Decoder","ssl_tasks":"Expression-ranked gene masking (CELLxGENE Human), Protein LM (Uniref90), Antibody LM (OAS), Antibody Denoising (OAS), Small-Molecule LM (ZINC), Protein Interaction LM (STRING)","supervised_tasks":"Cell type annotation (zheng68k), Cancer drug response prediction (GDSC1/2/3), Brain Blood Barrier Penetration prediction (MoleculeNet), Small-Molecule toxicity prediction (MoleculeNet), drug clinical trial result prediction (MoleculeNet), Antibody-Antigen binding prediction (HER2), Targeted antibody generation (SAbDAb), Protein-Protein delta-delta G  prediction (SKEMPI v2), Drug-Target interaction prediction (PEER), TCR binding prediction (Weber et al)"},{"model":"Nicheformer","paper":{"type":"peer_reviewed","text":"[Anna C. Schaar et al. 2024](https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291)","url":"https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4803291"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/theislab/nicheformer)","url":"https://github.com/theislab/nicheformer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scmFormer","paper":{"type":"preprint","text":"[Jing Xu et al. 2024](https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/)","url":"https://pmc.ncbi.nlm.nih.gov/articles/PMC11109621/"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/zhanglab-wbgcas/scmFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Metric Mirages","paper":{"type":"preprint","text":"[Hanchen Wang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.04.02.587824v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scEmb","paper":{"type":"preprint","text":"[Kang-Lin Hsieh et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.24.614685v1"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-ontology guided transcriptome foundation model","paper":{"type":"preprint","text":"[Xinyu Yuan et al. 2024](https://arxiv.org/pdf/2408.12373)","url":"https://arxiv.org/pdf/2408.12373"},"code":{"type":"-","text":"[](https://github.com/zhanglab-wbgcas/scmFormer)","url":"https://github.com/DeepGraphLearning/scCello"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cell-Graph Compass","paper":{"type":"preprint","text":"[Chen Fang et al. 2024](https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.06.04.597354v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/epang-ucas/Cell-Graph-Compass)","url":"https://github.com/epang-ucas/Cell-Graph-Compass"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGAA","paper":{"type":"peer_reviewed","text":"[Tianci Kong et al. 2024](https://www.nature.com/articles/s41598-024-73356-1)","url":"https://www.nature.com/articles/s41598-024-73356-1"},"code":{"type":"evaluation_only","text":"[ðGitHub]https://github.com/kongtianci/scGAA)","url":"https://github.com/kongtianci/scGAA"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scFusionTTT","paper":{"type":"preprint","text":"[Dian Meng et al. 2024](https://openreview.net/forum?id=7rR6RAUaoC)","url":"https://openreview.net/forum?id=7rR6RAUaoC"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scGenePT","paper":{"type":"preprint","text":"[Ana-Maria Istrate et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.23.619972v1.full.pdf"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/czi-ai/scGenePT)","url":"https://github.com/czi-ai/scGenePT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"ENHANCING GENERATIVE PERTURBATION MODELS WITH LLM-INFORMED GENE EMBEDDINGS","paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scSwinTNet","paper":{"type":"peer_reviewed","text":"[Huanhuan Dai et al. 2024](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051)","url":"https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10737051"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/scSwinTNet)","url":"https://github.com/Danica123/scSwinTNet"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sclong","paper":{"type":"preprint","text":"[Ding Bai et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2)","url":"https://www.biorxiv.org/content/10.1101/2024.11.09.622759v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BaiDing1234/scLong)","url":"https://github.com/BaiDing1234/scLong"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"WHITE-BOX DIFFUSION TRANSFORMER FOR SINGLE-CELL RNA-SEQ GENERATION","paper":{"type":"preprint","text":"[Zhuorui Cui et al. 2024](https://arxiv.org/pdf/2411.06785)","url":"https://arxiv.org/pdf/2411.06785"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"A framework for gene representation on spatial transcriptomics","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v5.full"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"genohoption","paper":{"type":"preprint","text":"[Jiabei Cheng et al. 2024](https://arxiv.org/pdf/2411.06331)","url":"https://arxiv.org/pdf/2411.06331"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Bunnybeibei/GenoHoption)","url":"https://github.com/Bunnybeibei/GenoHoption"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Cellpatch","paper":{"type":"preprint","text":"[Hanwen Zhu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.11.15.623701v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/HanwenZhu98/CellPatch)","url":"https://github.com/HanwenZhu98/CellPatch"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"GRNPT","paper":{"type":"preprint","text":"[Guangzheng Weng et al. 2024](https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990)","url":"https://advanced.onlinelibrary.wiley.com/doi/pdf/10.1002/advs.202409990"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://github.com/wgzgithub/GRNPT"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Aido.cell","paper":{"type":"preprint","text":"[Nicholas Ho et al. 2024](https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full)","url":"https://www.biorxiv.org/content/10.1101/2024.11.28.625303v1.full"},"code":{"type":"evaluation_only","text":"[ð\\x9F\\x94\\x8DGitHub](https://github.com/wgzgithub/GRNPT)","url":"https://huggingface.co/genbio-ai/AIDO.Cell-100M/tree/main"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"sctel","paper":{"type":"peer_reviewed","text":"[Yuanyuan Chen et al. 2024](https://www.nature.com/articles/s41540-024-00484-9)","url":"https://www.nature.com/articles/s41540-024-00484-9"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/142857cyy/scTEL)","u'rl":"https://github.com/142857cyy/scTEL"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"Toward a privacy-preserving predictive foundation model","paper":{"type":"preprint","text":"[Jiayuan Ding et al. 2024](https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2025.01.06.631427v1.full.pdf"},"code":{"type":"-","text":"-","url":"-"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"mcBERT","paper":{"type":"preprint","text":"[von Querfurth et al. 2024](https://doi.org/10.1101/2024.11.04.621897)","url":"https://doi.org/10.1101/2024.11.04.621897"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/COMSYS/mcBERT)","url":"https://github.com/COMSYS/mcBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"7M cells single tissue, human (see Table 1 in the paper; heart, kidney, PBMC, and lung)","input_embedding":"Cells as tokens + value projection","architecture":"Transformer Encoder / BERT","ssl_tasks":"MLM on cell-level, based on unmasked cells of patient","supervised_tasks":"Phenotype classification"},{"model":"CancerFoundation","paper":{"type":"preprint","text":"[Theus et al. 2024](https://doi.org/10.1101/2024.11.01.621087)","url":"https://doi.org/10.1101/2024.11.01.621087"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/BoevaLab/CancerFoundation)","url":"https://github.com/BoevaLab/CancerFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / Only malignant cells from the [Curated Cancer Cell Atlas](https://www.nature.com/articles/s41586-023-06130-4)","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Drug response prediction"},{"model":"Precious3GPT","paper":{"type":"preprint","text":"[Galkin et al. 2024](https://doi.org/10.1101/2024.07.25.605062)","url":"https://doi.org/10.1101/2024.07.25.605062"},"code":{"type":"evaluation_only","text":"[ðï¸ð¤](https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal)","url":"https://huggingface.co/insilicomedicine/precious3-gpt-multi-modal"},"omic_modalities":"Bulk/scRNA-seq, DNAm, proteomics, natural language annotations","pre_training_dataset":"Omics data with KG and text embeddings, Closed source","input_embedding":"?","architecture":"Decoder-only LLaMA-like transformer model with modality mapper units","ssl_tasks":"Emulation of chemical response, cross-species/tissue/omics transference, emulation of clinical conditions","supervised_tasks":"Age prediction, gene classification"},{"model":"LangCell","paper":{"type":"peer_reviewed","text":"[Zhao et al. 2024](https://arxiv.org/abs/2405.06708)","url":"https://arxiv.org/abs/2405.06708"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/PharMolix/LangCell)","url":"https://github.com/PharMolix/LangCell"},"omic_modalities":"scRNA-seq, natural language","pre_training_dataset":"27M / cross-tissue, human ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Ordering: rank-based, natural language cell description","architecture":"Other: two encoders (cell and text)","ssl_tasks":"MLM with CE loss, intra- and inter-modal contrastive loss, cell-text matching with CE loss","supervised_tasks":"Cell type annotation, pathway identification"},{"model":"ScRAT","paper":{"type":"peer_reviewed","text":"[Mao et al. 2024](https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064)","url":"https://academic.oup.com/bioinformatics/article/40/2/btae067/7613064"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/yuzhenmao/ScRAT)","url":"https://github.com/yuzhenmao/ScRAT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Cells as tokens","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Phenotype prediction: aggregated per sample cell embeddings are used to predict sample label (e.g., health condition)"},{"model":"scPRINT","paper":{"type":"preprint","text":"[Kalfon et al. 2024](https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1)","url":"https://www.biorxiv.org/content/10.1101/2024.07.29.605556v1"},"code":{"type":"reproducible","text":"[ð ï¸Github](https://github.com/cantinilab/scPRINT)","url":"https://github.com/cantinilab/scPRINT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are randomly sampled and order determined by position on chromosomes","architecture":"Encoder","ssl_tasks":"Multi task Pre-training: Denoising, Botleneck learning (+ many additional losses available)","supervised_tasks":"Cell label prediction (these supervised tasks are part of the pre-training)"},{"model":"scMulan","paper":{"type":"peer_reviewed","text":"[Bian et al. 2024](https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57)","url":"https://link.springer.com/chapter/10.1007/978-1-0716-3989-4_57"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/SuperBianC/scMulan/tree/main)","url":"https://github.com/SuperBianC/scMulan/tree/main"},"omic_modalities":"scRNA-seq","pre_training_dataset":"10M / cross-tissue, human ([hECA](https://www.sciencedirect.com/science/article/pii/S2589004222005892))","input_embedding":"Not specified","architecture":"Decoder","ssl_tasks":"Conditional cell generation","supervised_tasks":"cell type annotation, cell metadata annotation (both also used in training)"},{"model":"BioFormers","paper":{"type":"preprint","text":"[Belgadi and Li et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.29.569320v1.full.pdf"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"8K / single tissue, human ([PBMC](https://docs.scvi-tools.org/en/stable/api/reference/scvi.data.pbmc_dataset.html), [Adamson et al. 2016](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE90546))","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"-"},{"model":"Geneformer","paper":{"type":"peer_reviewed","text":"[Theodoris et al. 2023](https://www.nature.com/articles/s41586-023-06139-9)","url":"https://www.nature.com/articles/s41586-023-06139-9"},"code":{"type":"reproducible","text":"[ð ð¤](https://huggingface.co/ctheodoris/Geneformer)","url":"https://huggingface.co/ctheodoris/Geneformer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, human ([Genecorpus](https://huggingface.co/datasets/ctheodoris/Genecorpus-30M))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene ID prediction","supervised_tasks":"Gene function prediction, cell annotation"},{"model":"Universal Cell Embedding","paper":{"type":"preprint","text":"[Rosen et al. 2023](https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.11.28.568918v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/snap-stanford/uce)","url":"https://github.com/snap-stanford/uce"},"omic_modalities":"scRNA-seq","pre_training_dataset":"36M / cross-tissue, cross-species ([CELLxGENE](https://cellxgene.cziscience.com/) and [other](https://www.biorxiv.org/content/biorxiv/early/2023/11/29/2023.11.28.568918/DC3/embed/media-3.xlsx?download=true))","input_embedding":"Other: [ESM-2](https://www.science.org/doi/10.1126/science.ade2574) based gene embeddings. Gene embeddings are sampled according to expression levels and order determined by position on chromosomes.","architecture":"Encoder","ssl_tasks":"Modified MLM, binary CE loss predicting whether a gene is expressed or not. Uses CLS embedding instead of token-embeddings.","supervised_tasks":"Cell annotation"},{"model":"scGPT","paper":{"type":"peer_reviewed","text":"[Cui et al. 2024](https://www.nature.com/articles/s41592-024-02201-0)","url":"https://www.nature.com/articles/s41592-024-02201-0"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/bowang-lab/scGPT)","url":"https://github.com/bowang-lab/scGPT"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq, Spatial transcriptomics","pre_training_dataset":"33M / cross-tissue, human, non-disease ([CELLxGENE](https://cellxgene.cziscience.com/))","input_embedding":"Value categorization: value binning","architecture":"Other: attention masking in encoder","ssl_tasks":"Iterative MLM variant with MSE loss, cell token expression prediction, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction, reverse perturbation prediction, cell clustering, multimodal embedding, gene function prediction"},{"model":"TOSICA","paper":{"type":"peer_reviewed","text":"[Chen et al. 2023](https://www.nature.com/articles/s41467-023-35923-4)","url":"https://www.nature.com/articles/s41467-023-35923-4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/JackieHanLab/TOSICA)","url":"https://github.com/JackieHanLab/TOSICA"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scMoFormer","paper":{"type":"peer_reviewed","text":"[Tang et al. 2023](https://dl.acm.org/doi/10.1145/3583780.3615061)","url":"https://dl.acm.org/doi/10.1145/3583780.3615061"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/OmicsML/scMoFormer)","url":"https://github.com/OmicsML/scMoFormer"},"omic_modalities":"scRNA-seq, scATAC-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Other, SVD-based","architecture":"Encoder and graph transformers","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"tGPT","paper":{"type":"peer_reviewed","text":"[Shen et al. 2023](https://www.cell.com/iscience/pdf/S2589-0042(23)","url":"https://www.cell.com/iscience/pdf/S2589-0042(23"},"code":{"type":"reproducible","text":"[ð GitHubï¸](https://github.com/deeplearningplus/tGPT)","url":"https://github.com/deeplearningplus/tGPT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"22M / cross-tissue, cross-species, disease and non-disease, organoids ([list](https://www.cell.com/cms/10.1016/j.isci.2023.106536/attachment/1e95114b-5ea0-4596-afd8-0cb04bae0f6d/mmc2))","input_embedding":"Ordering","architecture":"Decoder","ssl_tasks":"NTP with CE loss, gene ID prediction","supervised_tasks":"-"},{"model":"SpaFormer","paper":{"type":"preprint","text":"[Wen et al. 2023](https://arxiv.org/abs/2302.03038)","url":"https://arxiv.org/abs/2302.03038"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/wehos/CellT)","url":"https://github.com/wehos/CellT"},"omic_modalities":"Spatial transcriptomics","pre_training_dataset":"-","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Gene expression imputation"},{"model":"scFoundation","paper":{"type":"peer_reviewed","text":"[Hao et al. 2024](https://www.nature.com/articles/s41592-024-02305-7)","url":"https://www.nature.com/articles/s41592-024-02305-7"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/biomap-research/scFoundation)","url":"https://github.com/biomap-research/scFoundation"},"omic_modalities":"scRNA-seq","pre_training_dataset":"50M / cross-tissue, human, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/))","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"Drug response prediction, genetic perturbation effect prediction"},{"model":"CellLM","paper":{"type":"preprint","text":"[Zhao et al. 2023](https://arxiv.org/abs/2306.04371)","url":"https://arxiv.org/abs/2306.04371"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/PharMolix/OpenBioMed)","url":"https://github.com/PharMolix/OpenBioMed"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1.8M / cross-tissue, human, disease and non-disease ([PanglaoDB](https://panglaodb.se/), [CancerSCEM](https://ngdc.cncb.ac.cn/cancerscem/))","input_embedding":"Value categorization","architecture":"Encoder","ssl_tasks":"Contrastive loss, MLM with CE loss","supervised_tasks":"Non-disease vs cancer prediction, cell type annotation, drug response prediction"},{"model":"scCLIP","paper":{"type":"preprint","text":"[Xiong et al. 2023](https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1)","url":"https://openreview.net/forum?id=KMtM5ZHxct&referrer=%5Bthe%20profile%20of%20Tianlong%20Chen%5D(%2Fprofile%3Fid%3D~Tianlong_Chen1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://anonymous.4open.science/r/scCLIP-61F6/README.md)","url":"https://anonymous.4open.science/r/scCLIP-61F6/README.md"},"omic_modalities":"scRNA-seq, scATAC-seq","pre_training_dataset":"377k / cross-tissue, human fetal ([ATAC](https://www.science.org/doi/10.1126/science.aba7612), [RNA](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7780123/))","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"Contrastive loss, CE matching modalities","supervised_tasks":"-"},{"model":"GeneCompass","paper":{"type":"preprint","text":"[Yang et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1)","url":"https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1"},"code":{"type":"reproducible","text":"[ð  GitHub](https://github.com/xCompass-AI/GeneCompass)","url":"https://github.com/xCompass-AI/GeneCompass"},"omic_modalities":"scRNA-seq","pre_training_dataset":"126M / cross-tissue, human and mouse, disease and non-disease ([GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [CELLxGENE](https://cellxgene.cziscience.com/), [GSA](https://ngdc.cncb.ac.cn/gsa/), [Single Cell Portal](https://singlecell.broadinstitute.org/single_cell), [HCA](https://data.humancellatlas.org/), [EMBL-EBI](https://www.ebi.ac.uk/), [3CA](https://www.weizmann.ac.il/sites/3CA/), [Cell BLAST](https://cblast.gao-lab.org/), [TEDD](https://TEDD.obg.cuhk.edu.hk/),  and [other](https://www.biorxiv.org/content/10.1101/2023.09.26.559542v1.full))","input_embedding":"?","architecture":"Other: two encoders","ssl_tasks":"MLM with CE and MSE loss, gene ID and expression prediction","supervised_tasks":"Cell type annotation, drug response prediction, gene function prediction"},{"model":"CellPLM","paper":{"type":"peer_reviewed","text":"[Wen et al. 2024](https://openreview.net/forum?id=BKXvPDekud)","url":"https://openreview.net/forum?id=BKXvPDekud"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/OmicsML/CellPLM)","url":"https://github.com/OmicsML/CellPLM"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"11M / cross-tissue, human, disease and non-disease ([HTCA](https://humantumoratlas.org/), [HCA](https://data.humancellatlas.org/), [GEO](https://www.ncbi.nlm.nih.gov/geo/))","input_embedding":"Cells as tokens, value projection","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE loss and KL losses, gene expression prediction","supervised_tasks":"Gene expression imputation, cell type annotation, genetic perturbation effect prediction"},{"model":"scMAE","paper":{"type":"preprint","text":"[Kim et al. 2023](https://openreview.net/pdf?id=2mq6uezuGj)","url":"https://openreview.net/pdf?id=2mq6uezuGj"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"single-cell flow cytometry","pre_training_dataset":"6.5M / human, disease and non-disease (source?)","input_embedding":"Other, concatenation of values with learnable protein embeddings","architecture":"Other: two encoders","ssl_tasks":"MLM with MSE loss, protein expression prediction","supervised_tasks":"Cell type annotation, protein expression imputation"},{"model":"CAN/CGRAN","paper":{"type":"peer_reviewed","text":"[Wang et al. 2023](https://ebooks.iospress.nl/volumearticle/64489)","url":"https://ebooks.iospress.nl/volumearticle/64489"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scTranslator","paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.04.547619v2.full"},"code":{"type":"evaluation_only","text":"[ðï¸GitHub](https://github.com/TencentAILabHealthcare/sctranslator)","url":"https://github.com/TencentAILabHealthcare/sctranslator"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Other: two encoders","ssl_tasks":"-","supervised_tasks":"Cross-modality prediction"},{"model":"scTransSort","paper":{"type":"peer_reviewed","text":"[Jiao et al. 2023](https://www.mdpi.com/2218-273X/13/4/611)","url":"https://www.mdpi.com/2218-273X/13/4/611"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/jiaojiao-123/scTransSort)","url":"https://github.com/jiaojiao-123/scTransSort"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"STGRNS","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621)","url":"https://academic.oup.com/bioinformatics/article/39/4/btad165/7099621"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/STGRNS)","url":"https://github.com/zhanglab-wbgcas/STGRNS"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Other","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"GRN inference"},{"model":"CIForm","paper":{"type":"peer_reviewed","text":"[Xu et al. 2023](https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext)","url":"https://academic.oup.com/bib/article-abstract/24/4/bbad195/7169137?redirectedFrom=fulltext"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/zhanglab-wbgcas/CIForm)","url":"https://github.com/zhanglab-wbgcas/CIForm"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scFormer","paper":{"type":"preprint","text":"[Cui et al. 2023](https://openreview.net/forum?id=7hdmA0qtr5)","url":"https://openreview.net/forum?id=7hdmA0qtr5"},"code":{"type":"partial","text":"[ï¸GitHub](https://github.com/bowang-lab/scFormer)","url":"https://github.com/bowang-lab/scFormer"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Task specific","input_embedding":"Value categorization: value binning","architecture":"Encoder","ssl_tasks":"Modified MLM with CE, cell token expression prediction, contrastive loss with cosine similarity, gene expression prediction","supervised_tasks":"Cell type annotation, genetic perturbation effect prediction"},{"model":"Exceiver","paper":{"type":"preprint","text":"[Connell et al. 2022](https://openreview.net/forum?id=XxRuCIgq2LX)","url":"https://openreview.net/forum?id=XxRuCIgq2LX"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/keiserlab/exceiver)","url":"https://github.com/keiserlab/exceiver"},"omic_modalities":"scRNA-seq","pre_training_dataset":"0.5M / cross-tissue, human ([Tabula Sapiens](https://figshare.com/ndownloader/files/34702114))","input_embedding":"Other: value scaled embeddings","architecture":"Encoder","ssl_tasks":"Modified MLM with MSE, gene expression prediction","supervised_tasks":"Cell type annotation, drug response prediction"},{"model":"TransCluster","paper":{"type":"peer_reviewed","text":"[Song et al. 2022](https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full)","url":"https://www.frontiersin.org/articles/10.3389/fgene.2022.1038919/full"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/Danica123/TransCluster)","url":"https://github.com/Danica123/TransCluster"},"omic_modalities":"scRNA-seq","pre_training_dataset":"-","input_embedding":"Value projection with LDA","architecture":"Encoder","ssl_tasks":"-","supervised_tasks":"Cell type annotation"},{"model":"scBERT","paper":{"type":"peer_reviewed","text":"[Yang et al. 2022](https://www.nature.com/articles/s42256-022-00534-z)","url":"https://www.nature.com/articles/s42256-022-00534-z"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/TencentAILabHealthcare/scBERT)","url":"https://github.com/TencentAILabHealthcare/scBERT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"1M / cross-tissue, human ([PanglaoDB](https://panglaodb.se/))","input_embedding":"Value categorization, binning","architecture":"Encoder","ssl_tasks":"MLM with CE loss, gene expression prediction","supervised_tasks":"Cell type annotation, unseen cell type detection"},{"model":"iSEEEK","paper":{"type":"peer_reviewed","text":"[Shen et al. 2022](https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false)","url":"https://academic.oup.com/bib/article/23/2/bbab573/6511497?login=false"},"code":{"type":"evaluation_only","text":"[ðGithub](https://github.com/lixiangchun/iSEEEK)","url":"https://github.com/lixiangchun/iSEEEK"},"omic_modalities":"scRNA-seq","pre_training_dataset":"11.9M / cross-tissue, cross-species ([list](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/bib/23/2/10.1093_bib_bbab573/1/supplementary_table_1_data_source_information_bbab573.xlsx?Expires=1710130756&Signature=EKqLNLeqpYl320~rRpsK79fA4RC1KvQcWeYqpGALwIQ2SK-IAqZmyXnU-tuYG4xzZqeOhjKc2oyhqBRkO8xJBGO~EMja5KhbNP8PS4nV1eFCrZW6GvbluqyqCP9v-z2ExYhSo3f4jisWi9irRL0y7fxIPk3dPV4f3NACrRjDinqpUZ2eJTKOWwQ8GTbL3dySxPNJa8XQ0y2lGv0lBO~KMG-PJROYDi-PQyHHkTz5AbAnhpD5jnK1BSdxNg8oFuIErS7U0ej8V86eA3AqZyFdI4RZtq-iPTkAEFSQROhQdv1aTh3Cj5EuIZvVgmivr~qF571C9m4IHnI0bEsrhI4f3w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA))","input_embedding":"Ordering: rank-based","architecture":"Encoder","ssl_tasks":"MLM with CE loss","supervised_tasks":"Marker gene classification"},{"model":"Multitask learning","paper":{"type":"preprint","text":"[Pang et al. 2020](https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2)","url":"https://www.biorxiv.org/content/10.1101/2020.02.05.935239v2"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"160k / brain, mouse ([MBA](http://mousebrain.org/))","input_embedding":"Value projection","architecture":"Other: autoencoder with two transformer encoders (?)","ssl_tasks":"Modified MLM with MSE loss, gene expression prediction","supervised_tasks":"-"}];
         window.transformerLLMs = [{"model":"stFormer","paper":{"type":"preprint","text":"[Shenghao Cao et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.27.615337v1"},"code":{"type":"reproducible","text":"[ðGitHub](https://github.com/ucaswangls/STFormer)","url":"https://github.com/ucaswangls/STFormer"},"omic_modalities":"-","pre_training_dataset":"-","input_embedding":"-","architecture":"-","ssl_tasks":"-","supervised_tasks":"-"},{"model":"scChat","paper":{"type":"preprint","text":"[Lu et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.01.616063v2.abstract"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/li-group/scChat)","url":"https://github.com/li-group/scChat"},"omic_modalities":"scRNA-seq","pre_training_dataset":"[GPT-4o](https://api.openai.com/)","input_embedding":"Other: Natural language descriptions","architecture":"[GPT-4o](https://api.openai.com/)","ssl_tasks":"-","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation, research hypothesis validation and generation, experiment design suggestions"},{"model":"CELLama","paper":{"type":"preprint","text":"[Choi et al. 2024](https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16)","url":"https://www.biorxiv.org/content/10.1101/2024.05.08.593094v1.full#ref-16"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/portrai-io/CELLama)","url":"https://github.com/portrai-io/CELLama"},"omic_modalities":"scRNA-seq, Spatial transcriptomics","pre_training_dataset":"Natural Language [SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","input_embedding":"Other: Ordering with embedding of the natural language representation, additional cell annotations are added in natural language","architecture":"[SBERT](https://fq.pkwyx.com/default/https/aclanthology.org/D19-1410.pdf)","ssl_tasks":"Contrastive loss","supervised_tasks":"Cell type annotation","zero_shot_tasks":"Cell type annotation, niche cell type featuring"},{"model":"CellWhisperer","paper":{"type":"preprint","text":"[Schaefer et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1)","url":"https://www.biorxiv.org/content/10.1101/2024.10.15.618501v1"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/epigen/cellwhisperer)","url":"https://github.com/epigen/cellwhisperer"},"omic_modalities":"Bulk/scRNA-seq","pre_training_dataset":"Transcriptome data paired with natural language annotations","input_embedding":"Geneformer- and BioBERT-based embedding models (contrastively fine-tuned)","architecture":"Multimodal contrastive training of embedding models (CLIP) and transcriptome instruction fine-tuning of LLM (LLaVA)","ssl_tasks":"-","supervised_tasks":"Transcriptome-aware question-answering","zero_shot_tasks":"Reference-free cell property prediction (cell types & states, disease states, organ of cell origin, ...)"},{"model":"scInterpreter","paper":{"type":"preprint","text":"[Li et al. 2024](https://arxiv.org/abs/2402.12405)","url":"https://arxiv.org/abs/2402.12405"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [GPT-3.5](https://api.openai.com/) and [Llama-13b](https://arxiv.org/abs/2302.13971)","input_embedding":"Other: Ordering with embedding of the natural language representation","architecture":"[GPT-3.5](https://api.openai.com/)","ssl_tasks":"NTP with CE loss and instruction finetuning (GPT-3.5 closed-source)","supervised_tasks":"-","zero_shot_tasks":"Cell type annotation (LLMs frozen, only small MLP trained)"},{"model":"ChatCell","paper":{"type":"preprint","text":"[Fang et al. 2024](https://arxiv.org/abs/2402.08303)","url":"https://arxiv.org/abs/2402.08303"},"code":{"type":"reproducible","text":"[ð GitHub](https://github.com/zjunlp/ChatCell)","url":"https://github.com/zjunlp/ChatCell"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language [T5](https://huggingface.co/docs/transformers/en/model_doc/t5) and [natural language instructions](https://huggingface.co/datasets/zjunlp/ChatCell-Instructions)","input_embedding":"Other: Ordering with embedding as natural language with additional terms","architecture":"[T5](https://huggingface.co/docs/transformers/en/model_doc/t5)","ssl_tasks":"NTP with CE loss","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Simulation, cell type annotation, drug sensitivity prediction"},{"model":"MarkerGeneBERT","paper":{"type":"preprint","text":"[Cheng et al. 2023](https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.30.578115v1"},"code":{"type":"none","text":"None","url":""},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, [PubMed](https://pubmed.ncbi.nlm.nih.gov/) and [PubMed Central](https://www.ncbi.nlm.nih.gov/pmc/)","input_embedding":"Other: Natural language preprocessed with [SciBERT](https://arxiv.org/abs/1903.10676)","architecture":"Encoder","ssl_tasks":"MLM","supervised_tasks":"Named Entity Recognition (NER), cell-biomarker sentence classification","zero_shot_tasks":"-"},{"model":"scELMo","paper":{"type":"preprint","text":"[Liu, Chen and Zheng 2023](https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2023.12.07.569910v1.full.pdf"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/HelloWorldLTY/scELMo)","url":"https://github.com/HelloWorldLTY/scELMo"},"omic_modalities":"scRNA-seq, CITE-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Other: NLP model embeddings of features weighted by the feature level in a cell (e.g. expression level)","architecture":"Closed source (some open)","ssl_tasks":"Closed source (some open)","supervised_tasks":"Cell type annotation, Genetic perturbation effect prediction","zero_shot_tasks":"Cell and gene embeddings in other perturbation models"},{"model":"GenePT","paper":{"type":"preprint","text":"[Chen and Zou 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.562533v1.full"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/yiqunchen/GenePT)","url":"https://github.com/yiqunchen/GenePT"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"Gene function prediction","zero_shot_tasks":"Cell clustering, GRN inference"},{"model":"GPT-4","paper":{"type":"peer_reviewed","text":"[W. Hou and Z. Ji 2024](https://www.nature.com/articles/s41592-024-02235-4)","url":"https://www.nature.com/articles/s41592-024-02235-4"},"code":{"type":"evaluation_only","text":"[ðGitHub](https://github.com/Winnie09/GPTCelltype)","url":"https://github.com/Winnie09/GPTCelltype"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language, Closed source","input_embedding":"Ordering: embedding as natural language","architecture":"Closed source","ssl_tasks":"Closed source","supervised_tasks":"None (conditional sequence generation, prompting)","zero_shot_tasks":"Cell type annotation"},{"model":"Cell2Sentence","paper":{"type":"peer_reviewed","text":"[Levine et al. 2024](https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1)","url":"https://openreview.net/forum?id=EWt5wsEdvc&referrer=%5Bthe%20profile%20of%20Josue%20Ortega%20Caro%5D(%2Fprofile%3Fid%3D~Josue_Ortega_Caro1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/vandijklab/cell2sentence-ft)","url":"https://github.com/vandijklab/cell2sentence-ft"},"omic_modalities":"scRNA-seq","pre_training_dataset":"Natural Language ([GPT2](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)) and [scRNA-seq](https://www.science.org/doi/full/10.1126/science.abl5197?casa_token=KSZInYXxqU4AAAAA%3AuNgeqoX4vxOaMPGAv4UW9_GMy1lMmZ1-QGyx2VBCSbsGWvchKCzdNUvwt-h_yemzugH075TGz6N8fw) (40k / immune, human)","input_embedding":"Ordering: embedding as natural language","architecture":"Decoder","ssl_tasks":"NTP with CE loss","supervised_tasks":"-","zero_shot_tasks":"Simulation, cell type annotation"}];
-        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."}];
+        window.transformerEvaluation = [{"paper":{"type":"preprint","text":"[Kaspar Märtens et al. 2024](https://openreview.net/pdf?id=eb3ndUlkt4)","url":"https://openreview.net/pdf?id=eb3ndUlkt4"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Ihab Bendidi et al. 2024](https://arxiv.org/pdf/2410.13956)","url":"https://arxiv.org/pdf/2410.13956"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/valence-labs/Tx-Evaluation)","url":"https://github.com/valence-labs/Tx-Evaluation"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[George Crowley et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf)","url":"https://www.biorxiv.org/content/10.1101/2024.10.10.617605v1.full.pdf"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ggit12/anndictionary/)","url":"https://github.com/ggit12/anndictionary/"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yan Wu et al. 2024](https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5)","url":"https://scholar.google.com/scholar?cluster=18315006149844520972&hl=en&as_sdt=0,5"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/altoslabs/perturbench)","url":"https://github.com/altoslabs/perturbench"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[A. Wenteler et al. 2024](https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract)","url":"https://www.biorxiv.org/content/10.1101/2024.10.02.616248v1.abstract"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/aaronwtr/PertEval)","url":"https://github.com/aaronwtr/PertEval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"UCE, scBERT, scGPT, Geneformer, scFoundation","tasks":"Transcriptomic perturbation prediction","notes":"Introduces PertEval-scFM, a benchmark to assess the zero-shot utility of single-cell foundation model embeddings for transcriptomic perturbation prediction. Uses SPECTRA to generate train-test splits with increasing dissimilarity to evaluate robustness against distribution shift. Models are evaluated with MSE and AUSPC, with AUSPC reflecting robustness under distribution shift. Additional analyses include E-distance and predicted transcriptomic distributions across the top 20 DEGs. Findings suggest that single-cell foundation model embeddings capture average perturbation effects but generally lack robustness to distribution shift. Ongoing work demonstrates that the domain-specific model GEARS outperforms foundation model embeddings, indicating that masked-language modeling on gene expression data without domain-specific inductive biases is insufficient for accurate transcriptomic perturbation prediction."},{"paper":{"type":"preprint","text":"[Eric Kernfeld et al. 2024](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full)","url":"https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2.full"},"code":{"type":"reproducible","text":"[ð\\x9F\\x9B\\_ï¸\\x8FGitHub](https://github.com/ekernf01/perturbation_benchmarking)","url":"https://github.com/ekernf01/perturbation_benchmarking"},"omic_modalities":"-","evaluated_transformers":"-","tasks":"-","notes":"-"},{"paper":{"type":"preprint","text":"[Yoav Kan-Tor et al. 2024](https://arxiv.org/abs/2412.04075)","url":"https://arxiv.org/abs/2412.04075"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/BiomedSciAI/gene-benchmark)","url":"https://github.com/BiomedSciAI/gene-benchmark"},"omic_modalities":"Natural Language, scRNAseq, protein sequence, DNA sequence","evaluated_transformers":"Mistral, MPnet, CellPLM, GeneFormer, scGPT, DNABert2, ESM (plus bag-of-words, gene2vec)","tasks":"genomic properties, gene regulatory functions, gene localization, gene biological processes, protein properties","notes":"Evaluating model performance using gene embeddings allows comparing models utilizing different data modalities. The performance profile of a diverse set of models across ~300 gene related tasks was shown to be most influenced by the training modality."},{"paper":{"type":"preprint","text":"[Ahlmann-Eltze et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.16.613342)","url":"https://www.biorxiv.org/content/10.1101/2024.09.16.613342"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/const-ae/linear_perturbation_prediction-Paper/)","url":"https://github.com/const-ae/linear_perturbation_prediction-Paper/"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, scFoundation","tasks":"Genetic perturbation effect prediction","notes":"A simple linear model performs better than scGPT and scFoundation (and GEARS)."},{"paper":{"type":"preprint","text":"[He et al. 2024](https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1)","url":"https://www.biorxiv.org/content/10.1101/2024.01.27.577455v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/laolintou/scPEFT)","url":"https://github.com/laolintou/scPEFT"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Cell type annotation","notes":"Evaluation of Parameter-Efficient Fine-Tuning (PEFT) for scGPT. Indicates that PEFT not only is more compute-efficient, but also results in better cell type prediction."},{"paper":{"type":"peer_reviewed","text":"[Khan et al. 2023](https://www.nature.com/articles/s42256-023-00757-8)","url":"https://www.nature.com/articles/s42256-023-00757-8"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/TranslationalBioinformaticsUnit/scbert-reusability)","url":"https://github.com/TranslationalBioinformaticsUnit/scbert-reusability"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT","tasks":"Cell type annotation. Unseen cell type detection","notes":"Focused on imbalanced cell type classification. scBERT is sensitive to class imbalance. scBERT outperforms Seurat. scBERT doesn't perform well in unseen cell type detection. It benefits from SSL pretraining."},{"paper":{"type":"preprint","text":"[Liu et al. 2023](https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4)","url":"https://www.biorxiv.org/content/10.1101/2023.09.08.555192v4"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/HelloWorldLTY/scEval)","url":"https://github.com/HelloWorldLTY/scEval"},"omic_modalities":"scRNA-seq, scATAC-seq, Spatial transcriptomics","evaluated_transformers":"scGPT, Geneformer, scBERT, tGPT, CellLM","tasks":"Cell clustering, cell type annotation, multimodal embedding, GRN inference, gene expression imputation, genetic perturbation effect prediction, simulation, gene function prediction","notes":"Models aren't trained on the same datasets. scGPT is positioned as most versatile in terms of task diversity that it can tackle. Models other than transformer appear to be at least as good as transformers in most tasks. Transformers were shown to be sensitive to the choice of hyperparameters, such as learning rate and epochs."},{"paper":{"type":"preprint","text":"[Boiarsky et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.19.563100v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/clinicalml/sc-foundation-eval)","url":"https://github.com/clinicalml/sc-foundation-eval"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scBERT, scGPT","tasks":"Cell type annotation","notes":"Logistic regression appears to be as good as transformers in cell type annotation, even in low-data scenarios."},{"paper":{"type":"preprint","text":"[Kedzierska et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2)","url":"https://www.biorxiv.org/content/10.1101/2023.10.16.561085v2"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/microsoft/zero-shot-scfoundation)","url":"https://github.com/microsoft/zero-shot-scfoundation"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer","tasks":"Cell clustering","notes":"Zero-shot performance only. Both models appear unreliable."},{"paper":{"type":"preprint","text":"[Alsabbagh et al. 2023](https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1)","url":"https://www.biorxiv.org/content/10.1101/2023.10.24.563625v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels)","url":"https://github.com/SabbaghCodes/ImbalancedLearningForSingleCellFoundationModels"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT, Geneformer, scBERT","tasks":"Cell type annotation","notes":"Focused on imbalanced cell type classification. Geneformer appears to be outperformed by scGPT and scBERT, where the two latter perform similarly."},{"paper":{"type":"preprint","text":"[Csendes et al. 2024](https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1)","url":"https://www.biorxiv.org/content/10.1101/2024.09.30.615843v1"},"code":{"type":"reproducible","text":"[ð ï¸GitHub](https://github.com/turbine-ai/PerturbSeqPredBenchmark)","url":"https://github.com/turbine-ai/PerturbSeqPredBenchmark"},"omic_modalities":"scRNA-seq","evaluated_transformers":"scGPT","tasks":"Genetic perturbation effect prediction","notes":"Simple baseline models can outperform scGPT on perturbational downstream tasks. The most widely used benchmarking datasets contain significant biases, making them suboptimal for evaluation."}];
     </script>
 
     <!-- Then load your bundles -->