Skip to content

Commit d0b5b42

Browse files
committed
Imro string parsing single source of truth
1 parent 2789811 commit d0b5b42

4 files changed

Lines changed: 2554 additions & 2394 deletions

File tree

.github/workflows/copy_probe_features.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ jobs:
1919
curl -o src/probeinterface/resources/neuropixels_probe_features.json \
2020
https://raw.githubusercontent.com/billkarsh/ProbeTable/refs/heads/main/Tables/probe_features.json
2121
22+
- name: Derive IMRO type mappings from catalogue
23+
run: python resources/postprocess_neuropixels_probe_features.py
24+
2225
- name: Commit changes if any
2326
id: commit
2427
run: |
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""
2+
Post-process neuropixels_probe_features.json after syncing from ProbeTable.
3+
4+
Derives two mappings from the catalogue and writes them back into the JSON:
5+
6+
- z_imro_format_type_to_imro_format: IMRO type code -> IMRO format name
7+
(e.g. "0" -> "imro_np1000", "1110" -> "imro_np1110")
8+
9+
- z_imro_format_type_to_part_number: IMRO type code -> canonical probe part number
10+
(e.g. "0" -> "NP1000", "1110" -> "NP1110")
11+
12+
This script is called by the GitHub Action workflow that syncs probe_features.json
13+
from billkarsh/ProbeTable, and can also be run standalone.
14+
"""
15+
16+
import json
17+
import re
18+
from pathlib import Path
19+
20+
PROBE_FEATURES_PATH = (
21+
Path(__file__).absolute().parent
22+
/ "../src/probeinterface/resources/neuropixels_probe_features.json"
23+
)
24+
25+
26+
def _parse_type_values_from_val_def(val_def: str) -> list[str]:
27+
"""Extract IMRO type code(s) from a val_def string.
28+
29+
Two patterns in ProbeTable:
30+
type:{0,1020,1030,...} -> set of values
31+
type:1110 -> single value
32+
"""
33+
match = re.match(r"type:\{([^}]+)\}", val_def)
34+
if match:
35+
return [v.strip() for v in match.group(1).split(",")]
36+
37+
match = re.match(r"type:(\d+)", val_def)
38+
if match:
39+
return [match.group(1)]
40+
41+
raise ValueError(f"Cannot parse type from val_def: {val_def!r}")
42+
43+
44+
def build_derived_mappings(probe_features: dict) -> tuple[dict, dict]:
45+
"""Build type-to-format and type-to-part-number mappings from the catalogue."""
46+
47+
imro_formats = probe_features["z_imro_formats"]
48+
probes = probe_features["neuropixels_probes"]
49+
50+
# 1. Build type -> format mapping from val_def entries
51+
type_to_format = {}
52+
for key, val_def in imro_formats.items():
53+
if not key.endswith("_val_def"):
54+
continue
55+
# e.g. "imro_np1000_val_def" -> "imro_np1000"
56+
format_name = key.removesuffix("_val_def")
57+
for type_code in _parse_type_values_from_val_def(val_def):
58+
if type_code in type_to_format:
59+
raise ValueError(
60+
f"IMRO type {type_code!r} maps to both "
61+
f"{type_to_format[type_code]!r} and {format_name!r}"
62+
)
63+
type_to_format[type_code] = format_name
64+
65+
# 2. Build type -> canonical part number mapping
66+
# For each type, find probes that use the matching format, then pick
67+
# the first NP-prefixed part number alphabetically.
68+
#
69+
# We also need to verify the candidate actually belongs to this type,
70+
# not just the same format. For example, NP1021 uses imro_np1000 format
71+
# but its IMRO type is not "0". We filter by checking the format's
72+
# val_def includes the type code we're resolving.
73+
74+
# Invert: format -> set of type codes it covers
75+
format_to_types = {}
76+
for type_code, format_name in type_to_format.items():
77+
format_to_types.setdefault(format_name, set()).add(type_code)
78+
79+
type_to_part_number = {}
80+
for type_code, format_name in sorted(type_to_format.items()):
81+
candidates = [
82+
pn
83+
for pn, spec in probes.items()
84+
if spec.get("imro_table_format_type") == format_name
85+
]
86+
87+
# Prefer a probe whose part number contains the type code (e.g. NP1020 for type "1020").
88+
# This matters because many probes share the same IMRO format but have different
89+
# physical geometries (e.g. NP1000 has 960 contacts, NP1020 has 2496).
90+
exact_matches = sorted(
91+
pn for pn in candidates if pn.startswith("NP") and type_code in pn
92+
)
93+
if exact_matches:
94+
type_to_part_number[type_code] = exact_matches[0]
95+
continue
96+
97+
# Fall back to first NP-prefixed name alphabetically
98+
np_candidates = sorted(pn for pn in candidates if pn.startswith("NP"))
99+
other_candidates = sorted(pn for pn in candidates if not pn.startswith("NP"))
100+
ordered = np_candidates + other_candidates
101+
102+
if ordered:
103+
type_to_part_number[type_code] = ordered[0]
104+
105+
return type_to_format, type_to_part_number
106+
107+
108+
def postprocess(filepath: Path = PROBE_FEATURES_PATH) -> None:
109+
filepath = filepath.resolve()
110+
with open(filepath) as f:
111+
probe_features = json.load(f)
112+
113+
type_to_format, type_to_part_number = build_derived_mappings(probe_features)
114+
115+
probe_features["z_imro_format_type_to_imro_format"] = dict(sorted(type_to_format.items(), key=lambda kv: int(kv[0])))
116+
probe_features["z_imro_format_type_to_part_number"] = dict(sorted(type_to_part_number.items(), key=lambda kv: int(kv[0])))
117+
118+
with open(filepath, "w") as f:
119+
json.dump(probe_features, f, indent=4)
120+
f.write("\n")
121+
122+
print(f"Wrote derived mappings to {filepath}")
123+
print(f" z_imro_format_type_to_imro_format: {len(type_to_format)} entries")
124+
print(f" z_imro_format_type_to_part_number: {len(type_to_part_number)} entries")
125+
for type_code in sorted(type_to_format, key=int):
126+
pn = type_to_part_number.get(type_code, "???")
127+
print(f" type {type_code:>5s} -> format={type_to_format[type_code]}, part_number={pn}")
128+
129+
130+
if __name__ == "__main__":
131+
postprocess()

src/probeinterface/neuropixels_tools.py

Lines changed: 62 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -24,45 +24,23 @@
2424
# Utils zone #
2525
###############
2626

27-
# Map imDatPrb_pn (probe number) to imDatPrb_type (probe type) when the latter is missing
28-
# ONLY needed for `read_imro` function
29-
probe_part_number_to_probe_type = {
30-
# for old version without a probe number we assume NP1.0
31-
None: "0",
32-
# NP1.0
33-
"PRB_1_4_0480_1": "0",
34-
"PRB_1_4_0480_1_C": "0", # This is the metal cap version
35-
"PRB_1_2_0480_2": "0",
36-
"NP1010": "0",
37-
# NHP probes lin
38-
"NP1015": "1015",
39-
"NP1016": "1015",
40-
"NP1017": "1015",
41-
# NHP probes stag med
42-
"NP1020": "1020",
43-
"NP1021": "1021",
44-
"NP1022": "1022",
45-
# NHP probes stag long
46-
"NP1030": "1030",
47-
"NP1031": "1031",
48-
"NP1032": "1032",
49-
# NP2.0
50-
"NP2000": "21",
51-
"NP2010": "24",
52-
"NP2013": "2013",
53-
"NP2014": "2014",
54-
"NP2003": "2003",
55-
"NP2004": "2004",
56-
"PRB2_1_2_0640_0": "21",
57-
"PRB2_4_2_0640_0": "24",
58-
# NXT
59-
"NP2020": "2020",
60-
# Ultra
61-
"NP1100": "1100", # Ultra probe - 1 bank
62-
"NP1110": "1110", # Ultra probe - 16 banks no handle because
63-
"NP1121": "1121", # Ultra probe - beta configuration
64-
# Opto
65-
"NP1300": "1300", # Opto probe
27+
# IMRO type codes not listed in any val_def entry in the ProbeTable catalogue.
28+
# These probes all use the imro_np1000 or imro_np2003/imro_np2013 format, but their
29+
# type codes are not in the corresponding val_def type sets.
30+
# We don't know if SpikeGLX actually produces IMRO files with these type codes
31+
# (there is no test data for them). They are kept here for backwards compatibility.
32+
# Values are (imro_format_name, canonical_part_number).
33+
#
34+
# TODO: @team - Should these be added to ProbeTable's val_def, or can they be removed?
35+
# If SpikeGLX never produces these type codes, this dict can be deleted entirely.
36+
_imro_format_type_fallback = {
37+
"1015": ("imro_np1000", "NP1015"),
38+
"1021": ("imro_np1000", "NP1021"),
39+
"1022": ("imro_np1000", "NP1022"),
40+
"1031": ("imro_np1000", "NP1031"),
41+
"1032": ("imro_np1000", "NP1032"),
42+
"2004": ("imro_np2003", "NP2004"),
43+
"2014": ("imro_np2013", "NP2014"),
6644
}
6745

6846
# Map from imro format to ProbeInterface naming conventions
@@ -439,24 +417,20 @@ def _annotate_probe_with_adc_sampling_info(probe: Probe, adc_sampling_table: str
439417
#########################
440418

441419

442-
def _parse_imro_string(imro_table_string: str, probe_part_number: str) -> dict:
420+
def _parse_imro_string(imro_table_string: str) -> dict:
443421
"""
444422
Parse IMRO (Imec ReadOut) table string into structured per-channel data.
445423
446424
IMRO format: "(probe_type,num_chans)(ch0 bank0 ref0 ...)(ch1 bank1 ref1 ...)..."
447425
Example: "(0,384)(0 1 0 500 250 1)(1 0 0 500 250 1)..."
448426
449-
Note: The IMRO header contains a probe_type field (e.g., "0", "21", "24"), which is
450-
a numeric format version identifier that specifies which IMRO table structure was used.
451-
Different probe generations use different IMRO formats. This is a file format detail,
452-
not a physical probe property.
427+
The IMRO type is extracted from the header and used to look up the field schema
428+
from the catalogue (z_imro_format_type_to_imro_format). No probe part number is needed.
453429
454430
Parameters
455431
----------
456432
imro_table_string : str
457433
IMRO table string from SpikeGLX metadata file
458-
probe_part_number : str
459-
Probe part number (e.g., "NP1000", "NP2000")
460434
461435
Returns
462436
-------
@@ -473,22 +447,41 @@ def _parse_imro_string(imro_table_string: str, probe_part_number: str) -> dict:
473447
Example for NP1110: {"header": {"type": 1110, "col_mode": 2, "ref_id": 0, ...},
474448
"group": [0,1,...], "bankA": [0,0,...], "bankB": [0,0,...]} # 24 entries, not 384
475449
"""
476-
# Get IMRO field format from catalogue
450+
# Parse IMRO header and per-entry values
451+
header_str, *imro_table_values_list, _ = imro_table_string.strip().split(")")
452+
header_values = tuple(map(int, header_str[1:].split(",")))
453+
454+
# Extract IMRO type from header. Phase3A probes have a 3-field header; all others
455+
# have 2+ fields with type as the first. Phase3A is treated as type 0.
456+
if len(header_values) == 3:
457+
imro_format_type = "0"
458+
else:
459+
imro_format_type = str(header_values[0])
460+
461+
# Look up the IMRO format schema from the catalogue's derived mappings
477462
probe_features = _load_np_probe_features()
478-
probe_spec = probe_features["neuropixels_probes"][probe_part_number]
479-
imro_format = probe_spec["imro_table_format_type"]
463+
type_to_format = probe_features["z_imro_format_type_to_imro_format"]
464+
465+
if imro_format_type in type_to_format:
466+
imro_format = type_to_format[imro_format_type]
467+
elif imro_format_type in _imro_format_type_fallback:
468+
imro_format = _imro_format_type_fallback[imro_format_type][0]
469+
else:
470+
valid_types = sorted(set(type_to_format) | set(_imro_format_type_fallback), key=int)
471+
raise ValueError(f"Unknown IMRO type '{imro_format_type}'. Valid types: {valid_types}")
472+
480473
imro_fields_string = probe_features["z_imro_formats"][imro_format + "_elm_flds"]
481474
imro_fields = tuple(imro_fields_string.replace("(", "").replace(")", "").split(" "))
482475

483-
# Parse IMRO header and per-entry values
484-
header_str, *imro_table_values_list, _ = imro_table_string.strip().split(")")
485-
486476
# Parse header fields using the catalogue schema
487477
imro_header_fields_string = probe_features["z_imro_formats"][imro_format + "_hdr_flds"]
488478
imro_header_fields = tuple(imro_header_fields_string.replace("(", "").replace(")", "").split(","))
489-
header_values = tuple(map(int, header_str[1:].split(",")))
490-
# Initialize with parsed header and empty lists for per-entry fields (filled below)
479+
# Initialize with parsed header and empty lists for per-entry fields (filled below).
480+
# For Phase3A (3-field header), zip silently drops the extra value, which is correct.
491481
imro_per_channel = {"header": dict(zip(imro_header_fields, header_values))}
482+
# Normalize Phase3A header type to 0 so downstream code reads it consistently
483+
if len(header_values) == 3:
484+
imro_per_channel["header"]["type"] = 0
492485
for field in imro_fields:
493486
imro_per_channel[field] = []
494487
for field_values_str in imro_table_values_list:
@@ -716,34 +709,27 @@ def read_imro(file_path: str | Path) -> Probe:
716709
https://billkarsh.github.io/SpikeGLX/help/imroTables/
717710
718711
"""
719-
# ===== 1. Read file and determine probe part number from IMRO header =====
712+
# ===== 1. Read file =====
720713
meta_file = Path(file_path)
721714
assert meta_file.suffix == ".imro", "'file' should point to the .imro file"
722715
with meta_file.open(mode="r") as f:
723716
imro_str = str(f.read())
724717

725-
imro_table_header_str, *imro_table_values_list, _ = imro_str.strip().split(")")
726-
imro_table_header = tuple(map(int, imro_table_header_str[1:].split(",")))
718+
# ===== 2. Parse IMRO table (type is extracted from the header automatically) =====
719+
imro_per_channel = _parse_imro_string(imro_str)
727720

728-
if len(imro_table_header) == 3:
729-
# In older versions of neuropixel arrays (phase 3A), imro tables were structured differently.
730-
# We use probe_type "0", which maps to probe_part_number NP1010 as a proxy for Phase3a.
731-
imDatPrb_type = "0"
732-
elif len(imro_table_header) == 2:
733-
imDatPrb_type, _ = imro_table_header
721+
# ===== 3. Resolve probe part number and build full probe =====
722+
imro_format_type = str(imro_per_channel["header"]["type"])
723+
probe_features = _load_np_probe_features()
724+
type_to_pn = probe_features["z_imro_format_type_to_part_number"]
725+
if imro_format_type in type_to_pn:
726+
probe_part_number = type_to_pn[imro_format_type]
727+
elif imro_format_type in _imro_format_type_fallback:
728+
probe_part_number = _imro_format_type_fallback[imro_format_type][1]
734729
else:
735-
raise ValueError(f"read_imro error, the header has a strange length: {imro_table_header}")
736-
imDatPrb_type = str(imDatPrb_type)
737-
738-
for probe_part_number, probe_type in probe_part_number_to_probe_type.items():
739-
if imDatPrb_type == probe_type:
740-
imDatPrb_pn = probe_part_number
741-
742-
# ===== 2. Interpret IMRO table =====
743-
imro_per_channel = _parse_imro_string(imro_str, imDatPrb_pn)
744-
745-
# ===== 3. Build full probe with all possible contacts =====
746-
full_probe = build_neuropixels_probe(probe_part_number=imDatPrb_pn)
730+
valid_types = sorted(set(type_to_pn) | set(_imro_format_type_fallback), key=int)
731+
raise ValueError(f"Unknown IMRO type '{imro_format_type}'. Valid types: {valid_types}")
732+
full_probe = build_neuropixels_probe(probe_part_number=probe_part_number)
747733

748734
# ===== 4. Slice full probe to active electrodes =====
749735
active_contact_ids = _get_imro_active_contact_ids(imro_per_channel)
@@ -820,7 +806,7 @@ def read_spikeglx(file: str | Path) -> Probe:
820806
# Specifies which electrodes were selected for recording (e.g., 384 of 960) plus their
821807
# acquisition settings (gains, references, filters). See: https://billkarsh.github.io/SpikeGLX/help/imroTables/
822808
imro_table_string = meta["imroTbl"]
823-
imro_per_channel = _parse_imro_string(imro_table_string, imDatPrb_pn)
809+
imro_per_channel = _parse_imro_string(imro_table_string)
824810

825811
# ===== 4. Slice full probe to active electrodes =====
826812
active_contact_ids = _get_imro_active_contact_ids(imro_per_channel)

0 commit comments

Comments
 (0)