Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ jobs:
- name: Check out repository code
uses: actions/checkout@v4

- name: Set up Python 3.10
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: '3.11'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e ".[dev]"

- name: Run tests with pytest
run: pytest
run: python -m pytest -q
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ cd spout_mouse
pip install .
```

## CLI

```bash
fp-analysis --help
```

## Usage

### Processing Lick Data
Expand Down Expand Up @@ -55,8 +61,8 @@ with open('path_to_credentials.json') as f:
client = authorize_google_sheets(credentials_json)

# Fetch experiment data
experiment_records = get_experiment_data(client)
experiment_df = pd.DataFrame(experiment_records)
google_sheet_url = "https://docs.google.com/spreadsheets/d/your-sheet-id"
experiment_df = get_experiment_data(client, google_sheet_url)

# Load and process spout metadata
spout_names = load_experiment_metadata(
Expand Down Expand Up @@ -97,7 +103,6 @@ from spout_mouse import (
calculate_mean_zscore,
prepare_long_format,
calculate_mean_sem_zscores,
plot_zscore_traces, # Assuming this function exists for plotting
)

# Define the directory pattern for the data blocks
Expand Down Expand Up @@ -128,9 +133,6 @@ mean_zscore_long = prepare_long_format(mean_zscore_df, across_days=False)
# Calculate mean and SEM of z-scores for plotting
mean_sem_zscores = calculate_mean_sem_zscores(mean_zscore_df, across_days=False)

# Generate plots (assuming you have a plotting function)
plot_zscore_traces(mean_sem_zscores)

# Save the processed data
mean_sem_zscores.to_csv('mean_sem_zscores.csv', index=False)
```
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@ gspread>=3.6
google-auth>=1.11
pingouin>=0.3.8
tdt>=0.6.6
pytest-mock>=3.0.0
7 changes: 3 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ classifiers =
Topic :: Scientific/Engineering :: Bio-Informatics
License :: OSI Approved :: MIT License
Programming Language :: Python :: 3
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
Programming Language :: Python :: 3.11
Operating System :: OS Independent
keywords = neuroscience, fiber photometry, data analysis, biology

[options]
packages = find:
python_requires = >=3.7
python_requires = >=3.9
install_requires =
pandas>=1.0
numpy>=1.18
Expand All @@ -41,7 +41,6 @@ install_requires =
google-auth>=1.11
pingouin>=0.3.8
tdt>=0.6.6
pytest-mock>=3.0.0
include_package_data = True

[options.extras_require]
Expand Down
18 changes: 0 additions & 18 deletions spout_mouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,30 +91,12 @@
'BASELINE_END',
'SEC_TO_DROP_START',
'SEC_TO_DROP_END',
'GOOGLE_SHEET_URL',
'MOUSE_GROUPS',
'downsample_stream',
'double_exponential',
'get_bounds',
'estimate_amplitude',
'estimate_time_constant',
'get_initial_params',
'detrend_signal',
'build_traces_df',
'build_spout_df',
'calculate_zscores',
'add_auc',
'prepare_fp_dataframe',
'clean_fp_trials',
'truncate_zscore_arrays',
'merge_fp_with_lick_data',
'calculate_mean_zscore',
'prepare_long_format',
'calculate_mean_sem_zscores',
'nape_cart_processing',
'is_first_mouse',
'is_second_mouse',
'process_mouse',
'process_block_path',
'process_all_blocks',
]
4 changes: 3 additions & 1 deletion spout_mouse/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def aggregate_data_and_calculate_sem(lick_data_spout: pd.DataFrame, combine_days
groups += ["day"]

def sem_func(arr):
if len(arr) <= 1:
return np.nan
return stats.sem(arr, axis=None, ddof=0)

lick_data_grouped = lick_data_spout.groupby(groups).agg(
Expand Down Expand Up @@ -187,7 +189,7 @@ def prepare_fp_dataframe(fp_df: pd.DataFrame, excluded_mice: list[str] = None) -
pd.DataFrame: The prepared DataFrame.
"""
# Convert mouse_id from int to str
fp_df["mouse_id"] = fp_df["mouse_id"].astype(str)
fp_df["mouse_id"] = fp_df["mouse_id"].astype(str).astype(object)

# Map mouse_id to group
fp_df["group"] = fp_df["mouse_id"].map(MOUSE_GROUPS)
Expand Down
34 changes: 34 additions & 0 deletions spout_mouse/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
"""Command-line interface for spout_mouse."""

import argparse
import sys
from typing import Optional, Sequence


def build_parser() -> argparse.ArgumentParser:
"""Build the top-level CLI parser."""
return argparse.ArgumentParser(
prog="fp-analysis",
description=(
"spout_mouse command-line entrypoint. "
"Use this package from Python for full analysis workflows."
),
)


def main(argv: Optional[Sequence[str]] = None) -> int:
"""
Run the CLI.

With no arguments, prints help and exits successfully.
"""
parser = build_parser()
if argv is None:
argv = sys.argv[1:]

if not argv:
parser.print_help()
return 0

parser.parse_args(argv)
return 0
36 changes: 28 additions & 8 deletions spout_mouse/data_loading.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import numpy as np
import pandas as pd
import os
from typing import List
from tdt import read_block
import re
from .config import (
DOWNSAMPLE_RATE,
SEC_TO_DROP_START,
Expand Down Expand Up @@ -60,14 +59,35 @@ def build_spout_df(timestamps: np.ndarray, block_path: str, mouse_id: str) -> pd
'mouse_id': mouse_id
})

# Extract cohort and day from block_path
parts = block_path.split(os.sep)
cohort_part = parts[3]
cohort = int(cohort_part.split()[1])
day_part = parts[2]
day = int(day_part.split()[1])
day, cohort = extract_day_and_cohort_from_path(block_path)

spout_ext_df['cohort'] = cohort
spout_ext_df['day'] = day

return spout_ext_df


def extract_day_and_cohort_from_path(path: str) -> tuple[int, int]:
"""
Extract day and cohort values from a path.

The function searches path segments for values formatted as
"day <int>" and "cohort <int>" at any directory depth.
"""
pattern = re.compile(r"^(day|cohort)\s+(\d+)$", re.IGNORECASE)
values: dict[str, int] = {}

for part in os.path.normpath(path).split(os.sep):
match = pattern.match(part.strip())
if not match:
continue
key, value = match.group(1).lower(), int(match.group(2))
if key not in values:
values[key] = value

missing = [key for key in ("day", "cohort") if key not in values]
if missing:
missing_str = ", ".join(missing)
raise ValueError(f"Could not parse {missing_str} from path: {path}")

return values["day"], values["cohort"]
34 changes: 29 additions & 5 deletions spout_mouse/data_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
LICK_DATA_COLS,
MOUSE_GROUPS
)
from .data_loading import extract_day_and_cohort_from_path


def extract_zip_files(zip_file_paths: List[str], extract_to: str) -> None:
Expand Down Expand Up @@ -106,9 +107,10 @@ def process_lick_data(data_directory: str, mouse_ids_to_remove: List[str] = None
.ffill()

lick_data = lick_data.apply(pd.to_numeric)
lick_data["mouse_id"] = os.path.basename(file_path).split("_")[3].split(".")[0]
lick_data["cohort"] = int(file_path.split("/")[3].split()[1])
lick_data["day"] = int(file_path.split("/")[2].split()[1])
lick_data["mouse_id"] = _extract_mouse_id_from_filename(file_path)
day, cohort = extract_day_and_cohort_from_path(file_path)
lick_data["cohort"] = cohort
lick_data["day"] = day
lick_data = lick_data.loc[lick_data["time_ms"] > 0]
lick_data = lick_data[lick_data["event_tag"].isin(LICK_CODES + [SPOUT_EXT_CODE])]

Expand All @@ -130,11 +132,33 @@ def process_lick_data(data_directory: str, mouse_ids_to_remove: List[str] = None
lick_data_all = pd.concat(lick_data_list, ignore_index=True)

if mouse_ids_to_remove:
lick_data_all = lick_data_all[lick_data_all['mouse_id'].isin(mouse_ids_to_remove)]
lick_data_all = lick_data_all[~lick_data_all['mouse_id'].isin(mouse_ids_to_remove)]

return lick_data_all


def _extract_mouse_id_from_filename(file_path: str) -> str:
"""
Extract mouse_id from the expected CSV filename format.

Expected filename contains at least four underscore-separated fields where
the fourth field is the mouse ID, optionally followed by extension.
"""
filename = os.path.basename(file_path)
parts = filename.split("_")
if len(parts) < 4:
raise ValueError(
"Could not parse mouse_id from filename "
f"'{filename}'. Expected at least 4 underscore-separated parts."
)

mouse_id = parts[3].split(".")[0]
if not mouse_id:
raise ValueError(f"Could not parse mouse_id from filename '{filename}'.")

return mouse_id


def compute_spout_order(lick_data: pd.DataFrame) -> pd.DataFrame:
"""
Computes the 'spout_order' DataFrame by grouping 'lick_data' by 'cohort', 'day', and 'trial_num',
Expand Down Expand Up @@ -235,5 +259,5 @@ def merge_spout_info(lick_rate: pd.DataFrame, spout_names: pd.DataFrame) -> pd.D
pd.DataFrame: Merged DataFrame with spout and group information.
"""
merged_data = lick_rate.merge(spout_names, on=['cohort', 'day', 'spout_id'], how='left')
merged_data['group'] = merged_data['mouse_id'].map(MOUSE_GROUPS)
merged_data['group'] = merged_data['mouse_id'].map(MOUSE_GROUPS).astype(object)
return merged_data
26 changes: 26 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from unittest.mock import patch

import pytest


class _Mocker:
def __init__(self):
self._patchers = []

def patch(self, target, *args, **kwargs):
patcher = patch(target, *args, **kwargs)
self._patchers.append(patcher)
return patcher.start()

def stopall(self):
while self._patchers:
self._patchers.pop().stop()


@pytest.fixture
def mocker():
mocker_instance = _Mocker()
try:
yield mocker_instance
finally:
mocker_instance.stopall()
5 changes: 2 additions & 3 deletions tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pandas as pd
import numpy as np
from spout_mouse import analysis
from spout_mouse.config import DOWNSAMPLE_RATE, MOUSE_GROUPS
from spout_mouse.config import DOWNSAMPLE_RATE
from unittest.mock import patch


Expand Down Expand Up @@ -125,14 +125,13 @@ def test_add_auc(self):
self.assertEqual(len(result_df), 2)
self.assertIsInstance(result_df['auc'].iloc[0], float)

@patch('spout_mouse.config.MOUSE_GROUPS', {'1274': 'sgRosa26'})
def test_prepare_fp_dataframe(self):
excluded_mice = ["0037", "9694", "1228", "0036", "0039", "9692", "0061"]
prepared_df = analysis.prepare_fp_dataframe(self.fp_df, excluded_mice)
# Check that mouse_id is string
self.assertTrue(prepared_df['mouse_id'].dtype == object)
# Check that 'group' is mapped correctly
expected_groups = prepared_df['mouse_id'].map(MOUSE_GROUPS)
expected_groups = prepared_df['mouse_id'].map(analysis.MOUSE_GROUPS)
pd.testing.assert_series_equal(prepared_df['group'], expected_groups, check_names=False)
# Check that excluded mice are removed
self.assertFalse(prepared_df['mouse_id'].isin(excluded_mice).any())
Expand Down
19 changes: 19 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pytest

from spout_mouse.cli import main


def test_main_no_args_prints_help(capsys):
result = main([])
captured = capsys.readouterr()

assert result == 0
assert "usage:" in captured.out
assert "fp-analysis" in captured.out


def test_main_invalid_args_raises_system_exit():
with pytest.raises(SystemExit) as exc_info:
main(["--not-a-real-flag"])

assert exc_info.value.code == 2
17 changes: 17 additions & 0 deletions tests/test_data_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,20 @@ def test_build_spout_df():
assert all(spout_df['cohort'] == 1)
assert all(spout_df['day'] == 2)


def test_build_spout_df_with_absolute_path():
timestamps = np.array([10.0, 20.0])
block_path = "/Users/daniel/data/experiments/cohort 4/raw/day 7/tanks/block-0000-0001"

spout_df = build_spout_df(timestamps, block_path, "0001")

assert all(spout_df["cohort"] == 4)
assert all(spout_df["day"] == 7)


def test_build_spout_df_missing_day_or_cohort_raises_value_error():
timestamps = np.array([10.0])
bad_path = os.path.join("path", "to", "data", "no_day_or_cohort", "block-0000-0001")

with pytest.raises(ValueError, match="Could not parse"):
build_spout_df(timestamps, bad_path, "0001")
Loading