Skip to content

Commit 18d06c1

Browse files
Reader for Curio files (#41)
* Add initial reader * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update api.md * Fix pre-commit --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 4cb0a22 commit 18d06c1

File tree

4 files changed

+102
-0
lines changed

4 files changed

+102
-0
lines changed

docs/api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ I/O for the `spatialdata` project.
1212
.. autosummary::
1313
:toctree: generated
1414
15+
curio
1516
cosmx
1617
visium
1718
xenium

src/spatialdata_io/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
from importlib.metadata import version
22

33
from spatialdata_io.readers.cosmx import cosmx
4+
from spatialdata_io.readers.curio import curio
45
from spatialdata_io.readers.mcmicro import mcmicro
56
from spatialdata_io.readers.steinbock import steinbock
67
from spatialdata_io.readers.visium import visium
78
from spatialdata_io.readers.xenium import xenium
89

910
__all__ = [
11+
"curio",
1012
"visium",
1113
"xenium",
1214
"cosmx",

src/spatialdata_io/_constants/_constants.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,21 @@
33
from spatialdata_io._constants._enum import ModeEnum
44

55

6+
@unique
7+
class CurioKeys(ModeEnum):
8+
"""Keys for *Curio* formatted dataset."""
9+
10+
# files and directories
11+
ANNDATA_FILE = ".h5ad"
12+
CLUSTER_ASSIGNMENT = "cluster_assignment.txt"
13+
METRICS_FILE = "Metrics.csv"
14+
VAR_FEATURES_CLUSTERS = "variable_features_clusters.txt"
15+
VAR_FEATURES_MORANSI = "variable_features_moransi.txt"
16+
# metadata
17+
CATEGORY = "Category"
18+
TOP_CLUSTER_DEFINING_FEATURES = "Top_cluster_defining_features"
19+
20+
621
@unique
722
class CosmxKeys(ModeEnum):
823
"""Keys for *Nanostring Cosmx* formatted dataset."""
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
from __future__ import annotations
2+
3+
from pathlib import Path
4+
from typing import Optional
5+
6+
import anndata as ad
7+
import pandas as pd
8+
from spatialdata import SpatialData
9+
from spatialdata.models import TableModel
10+
11+
from spatialdata_io._constants._constants import CurioKeys
12+
from spatialdata_io._docs import inject_docs
13+
14+
__all__ = ["curio"]
15+
16+
17+
@inject_docs(vx=CurioKeys)
18+
def curio(
19+
path: str | Path,
20+
dataset_id: Optional[str] = None,
21+
) -> SpatialData:
22+
"""
23+
Read *Curio* formatted dataset.
24+
25+
This function reads the following files:
26+
27+
- ``<dataset_id>_`{vx.ANNDATA_FILE!r}```: Counts and metadata file.
28+
- ``<dataset_id>_`{vx.CLUSTER_ASSIGNMENT!r}```: Cluster assignment file.
29+
- ``<dataset_id>_`{vx.METRICS_FILE!r}```: Metrics file.
30+
- ``<dataset_id>_`{vx.VAR_FEATURES_CLUSTERS!r}```: Variable features clusters file.
31+
- ``<dataset_id>_`{vx.VAR_FEATURES_MORANSI!r}```: Variable features Moran's I file.
32+
33+
.. seealso::
34+
35+
- `CODEX output <https://help.codex.bio/codex/processor/technical-notes/expected-output>`_.
36+
37+
Parameters
38+
----------
39+
path
40+
Path to the directory containing the data.
41+
dataset_id
42+
Dataset identifier.
43+
imread_kwargs
44+
Keyword arguments passed to :func:`dask_image.imread.imread`.
45+
image_models_kwargs
46+
Keyword arguments passed to :class:`spatialdata.models.Image2DModel`.
47+
48+
Returns
49+
-------
50+
:class:`spatialdata.SpatialData`
51+
"""
52+
path = Path(path)
53+
path_files = [
54+
CurioKeys.ANNDATA_FILE,
55+
CurioKeys.CLUSTER_ASSIGNMENT,
56+
CurioKeys.METRICS_FILE,
57+
CurioKeys.VAR_FEATURES_CLUSTERS,
58+
CurioKeys.VAR_FEATURES_MORANSI,
59+
]
60+
61+
if dataset_id is not None:
62+
file_names = [f"{dataset_id}_{file_name}" for file_name in path_files]
63+
else:
64+
file_names = []
65+
for file_name in path_files:
66+
file_names.extend(str(path.glob(file_name)))
67+
68+
adata = ad.read_h5ad(path / file_names[0])
69+
cluster_assign = pd.read_csv(path / file_names[1], sep="\t", header=None)
70+
metrics = pd.read_csv(path / file_names[2], sep=r"\,", header=0)
71+
var_features_clusters = pd.read_csv(path / file_names[3], sep="\t", header=0)
72+
var_features_moransi = pd.read_csv(path / file_names[4], sep="\t", header=0)
73+
74+
adata.obs = adata.obs.assign(cluster=cluster_assign[1].values)
75+
categories = metrics[CurioKeys.CATEGORY].unique()
76+
for cat in categories:
77+
df = metrics.loc[metrics[CurioKeys.CATEGORY] == cat]
78+
adata.uns[cat] = dict(zip(df.iloc[:, 0], df.iloc[:, 1]))
79+
adata.uns[CurioKeys.TOP_CLUSTER_DEFINING_FEATURES] = var_features_clusters
80+
adata.var.join(var_features_moransi, how="outer")
81+
82+
table = TableModel.parse(adata)
83+
84+
return SpatialData(table=table)

0 commit comments

Comments
 (0)