Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Final Edits, Monkeytype Run on all Files, RoughCal still in progress #6

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
57b3858
Finished monkeytype on noise_algorithms
jsh162 Aug 1, 2024
c397eb6
Created ini file and ran monkeytype on multifit,
jsh162 Aug 1, 2024
d6d9158
Finished misc.py and attempt with automated mypy test
jsh162 Aug 1, 2024
80b97fb
Ljh scripts completed with monkey and successful with mypy
jsh162 Aug 1, 2024
533152b
Update filters, monkeytype and mypy finished
jsh162 Aug 1, 2024
99f8c9e
Drift_correction finished with monkey type and mypy checked
jsh162 Aug 1, 2024
25b1163
Update python-app.yml
jsh162 Aug 12, 2024
1a88504
Merge branch 'main' into misc_typing
jsh162 Aug 12, 2024
17a48f7
Update to access directory
jsh162 Aug 12, 2024
775eec8
Merge branch 'misc_typing' of https://github.com/ggggggggg/moss into …
jsh162 Aug 12, 2024
d3e5236
Merge branch 'ljh_util+files_typing' into channels_typing
jsh162 Aug 12, 2024
f88287c
Altered misc
jsh162 Aug 12, 2024
f588e26
Merge branch 'channels_typing' into monkeyTypeCollection_typing
jsh162 Aug 12, 2024
fd951db
Update channels.py
jsh162 Aug 12, 2024
6666666
Merge branch 'channels_typing' into monkeyTypeCollection_typing
jsh162 Aug 12, 2024
deda1e0
Merge branch 'drift_correction_typing' into monkeyTypeCollection_typing
jsh162 Aug 12, 2024
5461500
Merge branch 'multi_fit_typing' into monkeyTypeCollection_typing
jsh162 Aug 12, 2024
b9fda58
Completed all files with mypy except for rough_cal and multifit
jsh162 Aug 13, 2024
2127af9
Test mypy again
jsh162 Aug 13, 2024
f7f2438
Test mypy utilizing follow-imports=silent feature
jsh162 Aug 13, 2024
6865928
Mypy completed and works, fixing pytest
jsh162 Aug 13, 2024
1ebb302
Fixing pytest to work with mypy
jsh162 Aug 13, 2024
1affec1
Merge branch 'misc_typing' into monkeyTypeCollection_typing
jsh162 Aug 13, 2024
62077f6
Updated Automated test to run on most files, fixed up multifit
jsh162 Aug 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions .github/workflows/python-app.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@ jobs:
# flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# # exit-zero treats all errors as warnings. ThSe GitHub editor is 127 chars wide
# flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
#- name: Test with mypy
# run: |
# mypy .
# # mypy path/to/specific_file.py

- name: Test with pytest
run: |
pytest
pytest
# call pytest twice since there is a bug that makes the examples fail if you call it only once https://github.com/marimo-team/marimo/issues/1888#issuecomment-2253432781

- name: Test with mypy
run: |
cd moss
mypy --follow-imports=silent misc.py ljhfiles.py noise_channel.py noise_algorithms.py multifit.py ljhutil.py filters.py drift_correction.py channels.py channel.py cal_steps.py




5 changes: 3 additions & 2 deletions moss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .misc import good_series
from .noise_algorithms import noise_psd, autocorrelation, NoisePSD
from .noise_channel import NoiseChannel
from .cal_steps import (CalSteps, CalStep, SummarizeStep)

from .multifit import FitSpec, MultiFit, MultiFitQuadraticGainCalStep, MultiFitMassCalibrationStep
from . import filters
from .filters import fourier_filter, Filter, Filter5LagStep
Expand All @@ -14,4 +14,5 @@
from .channel import Channel, ChannelHeader
from .channels import Channels
from .rough_cal import RoughCalibrationStep
from . import phase_correct
from . import phase_correct
from .cal_steps import (CalSteps, CalStep, SummarizeStep)
3 changes: 2 additions & 1 deletion moss/cal_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
import typing
import numpy as np
import moss
from typing import Callable, List, Optional, Tuple,Union


@dataclass(frozen=True)
class CalStep:
inputs: list[str]
output: list[str]
good_expr: pl.Expr
good_expr: Union[pl.Expr,bool]
use_expr: pl.Expr


Expand Down
9 changes: 5 additions & 4 deletions moss/channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from moss import NoiseChannel, CalSteps, DriftCorrectStep, SummarizeStep, Filter5LagStep
from typing import Optional
import numpy as np
from typing import Union
import time
import mass

Expand Down Expand Up @@ -133,13 +134,13 @@ def rough_cal_combinatoric(
use_expr=use_expr)
return self.with_step(step)

def rough_cal(self, line_names: list[str | float],
def rough_cal(self, line_names: list[str | np.float64],
uncalibrated_col: str="filtValue",
calibrated_col: Optional[str]=None,
use_expr: bool | pl.Expr =True,
use_expr: Union[bool,pl.Expr] =True,
max_fractional_energy_error_3rd_assignment: float=0.1,
min_gain_fraction_at_ph_30k: float=0.25,
fwhm_pulse_height_units: float=75,
fwhm_pulse_height_units: int=75,
n_extra_peaks: int=10,
acceptable_rms_residual_e: float=10):
step = moss.RoughCalibrationStep.learn_3peak(self, line_names, uncalibrated_col, calibrated_col,
Expand Down Expand Up @@ -440,7 +441,7 @@ def multifit_quadratic_gain_cal(

def multifit_mass_cal(self, multifit: moss.MultiFit,
previous_cal_step_index, calibrated_col, use_expr=True):
step = moss.MultiFitMassCalibrationStep.learn(self, multifit_spec=multifit,
step = moss.MultiFitMassCalibrationStep.learn(self, multifit=multifit,
previous_cal_step_index=previous_cal_step_index,
calibrated_col=calibrated_col,
use_expr=use_expr)
Expand Down
61 changes: 35 additions & 26 deletions moss/channels.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
import mass
import moss
import joblib
from mass.calibration.line_models import LineModelResult
from pathlib import WindowsPath
from polars.dataframe.frame import DataFrame
from typing import Callable, List, Optional, Tuple

@dataclass(frozen=True)
class Channels:
Expand All @@ -20,33 +24,37 @@ def ch0(self):
return v

@functools.cache
def dfg(self, exclude="pulse"):
def dfg(self, exclude: str="pulse") -> DataFrame:
# return a dataframe containing good pulses from each channel,
# exluding "pulse" by default
# and including columns "key" (to be removed?) and "ch_num"
# the more common call should be to wrap this in a convenient plotter
dfs = []
dfs: List[pl.DataFrame] = []
for ch_num, channel in self.channels.items():
df = channel.df.select(pl.exclude(exclude)).filter(channel.good_expr)
df = channel.df.collect() if isinstance(channel.df, pl.LazyFrame) else channel.df
rows=df.height

# key_series = pl.Series("key", dtype=pl.Int64).extend_constant(key, len(df))
assert ch_num == channel.header.ch_num
ch_series = pl.Series("ch_num", dtype=pl.Int64).extend_constant(
channel.header.ch_num, len(df)
channel.header.ch_num, rows
)
dfs.append(df.with_columns(ch_series))
return pl.concat(dfs)

combined_df=pl.concat(dfs)
return combined_df

def linefit(
self,
line,
col,
use_expr=True,
has_linear_background=False,
has_tails=False,
dlo=50,
dhi=50,
binsize=0.5,
):
line: str,
col: str,
use_expr: bool=True,
has_linear_background: bool=False,
has_tails: bool=False,
dlo: int=50,
dhi: int=50,
binsize: float=0.5,
) -> LineModelResult:
model = mass.get_model(line, has_linear_background=False, has_tails=False)
pe = model.spect.peak_energy
_bin_edges = np.arange(pe - dlo, pe + dhi, binsize)
Expand All @@ -67,7 +75,7 @@ def linefit(
)
return result

def map(self, f, allow_throw=True):
def map(self, f: Callable, allow_throw: bool=True) -> "Channels":
new_channels = collections.OrderedDict()
for key, channel in self.channels.items():
try:
Expand All @@ -91,7 +99,7 @@ def work(key):
results = parallel(joblib.delayed(work)(key) for key in self.channels.keys())
return results

def __hash__(self):
def __hash__(self) -> int:
# needed to make functools.cache work
# if self or self.anything is mutated, assumptions will be broken
# and we may get nonsense results
Expand All @@ -101,7 +109,7 @@ def __eq__(self, other):
return id(self) == id(other)

@classmethod
def from_ljh_path_pairs(cls, pulse_noise_pairs, description):
def from_ljh_path_pairs(cls, pulse_noise_pairs: List[Tuple[str, str]], description: str) -> "Channels":
_channels = collections.OrderedDict()
for pulse_path, noise_path in pulse_noise_pairs:
channel = moss.Channel.from_ljh(pulse_path, noise_path)
Expand All @@ -117,22 +125,23 @@ def from_off_paths(cls, off_paths, description):
return cls(channels, description)

@classmethod
def from_ljh_folder(cls, pulse_folder, noise_folder=None, limit=None):
def from_ljh_folder(cls, pulse_folder: str, noise_folder: Optional[str]=None, limit: None=None) -> "Channels":
import os
assert os.path.isdir(pulse_folder),f"{pulse_folder=} {noise_folder=}"
if noise_folder is None:
paths = moss.ljhutil.find_ljh_files(pulse_folder)
pairs = ((path, None) for path in paths)
pairs: List[Tuple[str, str]] = [(path,'') for path in paths]
else:
assert os.path.isdir(noise_folder), f"{pulse_folder=} {noise_folder=}"
pairs = moss.ljhutil.match_files_by_channel(pulse_folder, noise_folder, limit=limit)
description = f"from_ljh_folder {pulse_folder=} {noise_folder=}"
return cls.from_ljh_path_pairs(pairs, description)

def get_experiment_state_df(self, experiment_state_path=None):
def get_experiment_state_df(self, experiment_state_path: Optional[str]=None) -> DataFrame:
if experiment_state_path is None:
first_ch = next(iter(self.channels.values()))
ljh_path = first_ch.header.df["Filename"][0]
df = first_ch.header.df.collect() if isinstance(first_ch.header.df, pl.LazyFrame) else first_ch.header.df
ljh_path = df.select("Filename").to_series().item()
experiment_state_path = moss.ljhutil.experiment_state_path_from_ljh_path(ljh_path)
df = pl.read_csv(experiment_state_path, new_columns=["unixnano", "state_label"])
# _col0, _col1 = df.columns
Expand All @@ -142,14 +151,14 @@ def get_experiment_state_df(self, experiment_state_path=None):
df_es = df_es.with_columns(state_label = pl.Series(values=sl_series, dtype=pl.Categorical))
return df_es

def with_experiment_state_by_path(self, experiment_state_path=None):
def with_experiment_state_by_path(self, experiment_state_path: Optional[str]=None) -> "Channels":
df_es = self.get_experiment_state_df(experiment_state_path)
return self.with_experiment_state(df_es)

def with_experiment_state(self, df_es):
def with_experiment_state(self, df_es: DataFrame) -> "Channels":
# this is not as performant as making use_exprs for states
# and using .set_sorted on the timestamp column
ch2s = {}
ch2s: collections.OrderedDict[int, moss.Channel] = collections.OrderedDict()
for ch_num, ch in self.channels.items():
ch2s[ch_num] = ch.with_experiment_state_df(df_es)
return Channels(ch2s, self.description)
Expand All @@ -162,11 +171,11 @@ def with_steps_dict(self, steps_dict):
ch2s[ch_num] = ch2
return Channels(ch2s, self.description+"\nfollowed some steps!!")

def concat_data(self, other_data):
def concat_data(self, other_data: "Channels") -> "Channels":
# sorting here to show intention, but I think set is sorted by insertion order as
# an implementation detail so this may not do anything
ch_nums = sorted(list(set(self.channels.keys()).union(other_data.channels.keys())))
channels2 = {}
channels2:collections.OrderedDict[int, moss.Channel] = collections.OrderedDict()
for ch_num in ch_nums:
ch = self.channels[ch_num]
other_ch = other_data.channels[ch_num]
Expand Down
13 changes: 9 additions & 4 deletions moss/drift_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@
import polars as pl
import typing
import pylab as plt
from matplotlib.axes._axes import Axes
from moss.channel import Channel
from numpy import ndarray
from polars.dataframe.frame import DataFrame
from polars.expr.expr import Expr

def drift_correct_mass(indicator, uncorrected):
def drift_correct_mass(indicator: ndarray, uncorrected: ndarray) -> "DriftCorrection":
slope, dc_info = \
mass.core.analysis_algorithms.drift_correct(indicator, uncorrected)
offset = dc_info["median_pretrig_mean"]
Expand All @@ -27,15 +32,15 @@ def drift_correct_wip(indicator, uncorrected):
class DriftCorrectStep(CalStep):
dc: typing.Any

def calc_from_df(self, df):
def calc_from_df(self, df: DataFrame) -> DataFrame:
indicator_col, uncorrected_col = self.inputs
slope, offset = self.dc.slope, self.dc.offset
df2 = df.select(
(pl.col(uncorrected_col) * (1 + slope * (pl.col(indicator_col) - offset))).alias(self.output[0])
).with_columns(df)
return df2

def dbg_plot(self, df):
def dbg_plot(self, df: DataFrame) -> Axes:
indicator_col, uncorrected_col = self.inputs
# breakpoint()
df_small = (
Expand All @@ -56,7 +61,7 @@ def dbg_plot(self, df):
return plt.gca()

@classmethod
def learn(cls, ch, indicator_col, uncorrected_col, corrected_col, use_expr):
def learn(cls, ch: Channel, indicator_col: str, uncorrected_col: str, corrected_col: str, use_expr: Expr) -> "DriftCorrectStep":
if corrected_col is None:
corrected_col = uncorrected_col + "_dc"
indicator_s, uncorrected_s = ch.good_serieses([indicator_col, uncorrected_col], use_expr)
Expand Down
27 changes: 16 additions & 11 deletions moss/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,24 @@
from dataclasses import dataclass
import moss
import polars as pl
from numpy import float64, ndarray
from polars.dataframe.frame import DataFrame
from typing import Optional, Tuple, Union
import matplotlib.pyplot as plt

def fourier_filter(avg_signal, noise_psd, dt, fmax=None, f_3db=None, peak_signal=1.0):

def fourier_filter(avg_signal: ndarray, noise_psd: ndarray, dt: float, fmax: None=None, f_3db: Optional[float]=None, peak_signal: float=1.0) -> "Filter":
filter, variance = calc_fourier_filter(avg_signal, noise_psd, dt, fmax, f_3db, peak_signal)
return Filter(filter, variance, dt, filter_type="fourier")

@dataclass(frozen=True)
class Filter:
filter: np.ndarray
variance: float
variance: float64
dt: float
filter_type: str

def plot(self, axis=None, **plotkwarg):
def plot(self, axis: Optional[plt.axes]=None, **plotkwarg):
if axis is None:
plt.figure()
axis = plt.gca()
Expand All @@ -26,14 +31,14 @@ def plot(self, axis=None, **plotkwarg):
axis.set_xlabel("Lag Time (s)")
axis.figure.tight_layout()

def frequencies(self):
def frequencies(self) -> ndarray:
n = len(self.filter)
return np.arange(0, n, dtype=float) * 0.5 / ((n - 1) * self.dt)

def __call__(self, pulse):
return np.dot(self.filter, pulse)

def apply_fmax(signal_freq_domain, fmax, dt):
def apply_fmax(signal_freq_domain: ndarray, fmax: None, dt: float) -> ndarray:
if fmax is None:
return signal_freq_domain
n = len(signal_freq_domain)
Expand All @@ -42,18 +47,18 @@ def apply_fmax(signal_freq_domain, fmax, dt):
out[freq>fmax]=0
return out

def apply_f_3db(signal_freq_domain, f_3db, dt):
def apply_f_3db(signal_freq_domain: ndarray, f_3db: Union[float,None], dt: float) -> ndarray:
if f_3db is None:
return signal_freq_domain
n = len(signal_freq_domain)
freq = np.arange(0, n, dtype=float) * 0.5 / ((n - 1) * dt)
return signal_freq_domain / (1 + (freq * 1.0 / f_3db)**2)

def normalize_filter(filter):
def normalize_filter(filter: ndarray) -> ndarray:
filter -= np.mean(filter)
return filter/np.sqrt(np.dot(filter, filter))

def calc_fourier_filter(avg_signal, noise_psd, dt, fmax=None, f_3db=None, peak_signal=1.0):
def calc_fourier_filter(avg_signal: ndarray, noise_psd: ndarray, dt: float, fmax: None=None, f_3db: Optional[float]=None, peak_signal: float=1.0) -> Tuple[ndarray, float64]:
"""Compute the Fourier-domain filter and variances for signal processing.

Args:
Expand Down Expand Up @@ -86,7 +91,7 @@ def calc_fourier_filter(avg_signal, noise_psd, dt, fmax=None, f_3db=None, peak_s
variance = 1 / kappa
return normalize_filter(filter), variance

def filter_data_5lag(filter_values, pulses):
def filter_data_5lag(filter_values: ndarray, pulses: ndarray) -> Tuple[ndarray, ndarray]:
# These parameters fit a parabola to any 5 evenly-spaced points
fit_array = (
np.array(
Expand All @@ -113,7 +118,7 @@ class Filter5LagStep(moss.CalStep):
filter: Filter
spectrum: moss.NoisePSD

def calc_from_df(self, df):
def calc_from_df(self, df: DataFrame) -> DataFrame:
dfs = []
for df_iter in df.iter_slices(10000):
peak_x, peak_y = moss.filters.filter_data_5lag(
Expand All @@ -124,5 +129,5 @@ def calc_from_df(self, df):
df2 = df2.rename({"peak_x": self.output[0], "peak_y": self.output[1]})
return df2

def dbg_plot(self, df):
def dbg_plot(self, df: DataFrame) -> None:
return self.filter.plot()
Loading
Loading