ggggggggg · jsh162 · Aug 1, 2024 · Aug 1, 2024 · Aug 1, 2024 · Aug 1, 2024
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -34,13 +34,16 @@ jobs:
     #     flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
     #     # exit-zero treats all errors as warnings. ThSe GitHub editor is 127 chars wide
     #     flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    #- name: Test with mypy
-     # run: |
-      #  mypy .
-     #   # mypy path/to/specific_file.py
-
     - name: Test with pytest
       run: |
-        pytest
+        pytest 
     # call pytest twice since there is a bug that makes the examples fail if you call it only once https://github.com/marimo-team/marimo/issues/1888#issuecomment-2253432781
+
+    - name: Test with mypy
+      run: |
+        cd moss
+        mypy --follow-imports=silent misc.py ljhfiles.py noise_channel.py noise_algorithms.py multifit.py ljhutil.py filters.py drift_correction.py channels.py channel.py cal_steps.py
+
+
+
 
diff --git a/moss/__init__.py b/moss/__init__.py
@@ -5,7 +5,7 @@
 from .misc import good_series
 from .noise_algorithms import noise_psd, autocorrelation, NoisePSD
 from .noise_channel import NoiseChannel
-from .cal_steps import (CalSteps, CalStep, SummarizeStep)
+
 from .multifit import FitSpec, MultiFit, MultiFitQuadraticGainCalStep, MultiFitMassCalibrationStep
 from . import filters
 from .filters import fourier_filter, Filter, Filter5LagStep
@@ -14,4 +14,5 @@
 from .channel import Channel, ChannelHeader
 from .channels import Channels
 from .rough_cal import RoughCalibrationStep
-from . import phase_correct
+from . import phase_correct
+from .cal_steps import (CalSteps, CalStep, SummarizeStep)
diff --git a/moss/cal_steps.py b/moss/cal_steps.py
@@ -6,13 +6,14 @@
 import typing
 import numpy as np
 import moss
+from typing import Callable, List, Optional, Tuple,Union
 
 
 @dataclass(frozen=True)
 class CalStep:
     inputs: list[str]
     output: list[str]
-    good_expr: pl.Expr
+    good_expr: Union[pl.Expr,bool]
     use_expr: pl.Expr
 
 

diff --git a/moss/channel.py b/moss/channel.py
@@ -7,6 +7,7 @@
 from moss import NoiseChannel, CalSteps, DriftCorrectStep, SummarizeStep, Filter5LagStep
 from typing import Optional
 import numpy as np
+from typing import Union
 import time
 import mass
 
@@ -133,13 +134,13 @@ def rough_cal_combinatoric(
                                              use_expr=use_expr)
         return self.with_step(step)
 
-    def rough_cal(self, line_names: list[str | float],
+    def rough_cal(self, line_names: list[str | np.float64],
     uncalibrated_col: str="filtValue",
     calibrated_col: Optional[str]=None,
-    use_expr: bool | pl.Expr =True,
+    use_expr: Union[bool,pl.Expr] =True,
     max_fractional_energy_error_3rd_assignment: float=0.1,
     min_gain_fraction_at_ph_30k: float=0.25,
-    fwhm_pulse_height_units: float=75,
+    fwhm_pulse_height_units: int=75,
     n_extra_peaks: int=10,
     acceptable_rms_residual_e: float=10):
         step = moss.RoughCalibrationStep.learn_3peak(self, line_names, uncalibrated_col, calibrated_col,
@@ -440,7 +441,7 @@ def multifit_quadratic_gain_cal(
 
     def multifit_mass_cal(self, multifit: moss.MultiFit, 
                             previous_cal_step_index, calibrated_col, use_expr=True):
-        step = moss.MultiFitMassCalibrationStep.learn(self, multifit_spec=multifit,
+        step = moss.MultiFitMassCalibrationStep.learn(self, multifit=multifit,
                                             previous_cal_step_index=previous_cal_step_index,
                                              calibrated_col=calibrated_col,
                                              use_expr=use_expr)

diff --git a/moss/channels.py b/moss/channels.py
@@ -7,6 +7,10 @@
 import mass
 import moss
 import joblib
+from mass.calibration.line_models import LineModelResult
+from pathlib import WindowsPath
+from polars.dataframe.frame import DataFrame
+from typing import Callable, List, Optional, Tuple
 
 @dataclass(frozen=True)
 class Channels:
@@ -20,33 +24,37 @@ def ch0(self):
             return v
 
     @functools.cache
-    def dfg(self, exclude="pulse"):
+    def dfg(self, exclude: str="pulse") -> DataFrame:
         # return a dataframe containing good pulses from each channel,
         # exluding "pulse" by default
         # and including columns "key" (to be removed?) and "ch_num"
         # the more common call should be to wrap this in a convenient plotter
-        dfs = []
+        dfs: List[pl.DataFrame] = []
         for ch_num, channel in self.channels.items():
-            df = channel.df.select(pl.exclude(exclude)).filter(channel.good_expr)
+            df = channel.df.collect() if isinstance(channel.df, pl.LazyFrame) else channel.df
+            rows=df.height
+
             # key_series = pl.Series("key", dtype=pl.Int64).extend_constant(key, len(df))
             assert ch_num == channel.header.ch_num
             ch_series = pl.Series("ch_num", dtype=pl.Int64).extend_constant(
-                channel.header.ch_num, len(df)
+                channel.header.ch_num, rows
             )
             dfs.append(df.with_columns(ch_series))
-        return pl.concat(dfs)
+
+        combined_df=pl.concat(dfs)
+        return combined_df
 
     def linefit(
         self,
-        line,
-        col,
-        use_expr=True,
-        has_linear_background=False,
-        has_tails=False,
-        dlo=50,
-        dhi=50,
-        binsize=0.5,
-    ):
+        line: str,
+        col: str,
+        use_expr: bool=True,
+        has_linear_background: bool=False,
+        has_tails: bool=False,
+        dlo: int=50,
+        dhi: int=50,
+        binsize: float=0.5,
+    ) -> LineModelResult:
         model = mass.get_model(line, has_linear_background=False, has_tails=False)
         pe = model.spect.peak_energy
         _bin_edges = np.arange(pe - dlo, pe + dhi, binsize)
@@ -67,7 +75,7 @@ def linefit(
         )
         return result
 
-    def map(self, f, allow_throw=True):
+    def map(self, f: Callable, allow_throw: bool=True) -> "Channels":
         new_channels = collections.OrderedDict()
         for key, channel in self.channels.items():
             try:
@@ -91,7 +99,7 @@ def work(key):
         results = parallel(joblib.delayed(work)(key) for key in self.channels.keys())
         return results
 
-    def __hash__(self):
+    def __hash__(self) -> int:
         # needed to make functools.cache work
         # if self or self.anything is mutated, assumptions will be broken
         # and we may get nonsense results
@@ -101,7 +109,7 @@ def __eq__(self, other):
         return id(self) == id(other)
 
     @classmethod
-    def from_ljh_path_pairs(cls, pulse_noise_pairs, description):
+    def from_ljh_path_pairs(cls, pulse_noise_pairs: List[Tuple[str, str]], description: str) -> "Channels":
         _channels = collections.OrderedDict()
         for pulse_path, noise_path in pulse_noise_pairs:
             channel = moss.Channel.from_ljh(pulse_path, noise_path)
@@ -117,22 +125,23 @@ def from_off_paths(cls, off_paths, description):
         return cls(channels, description)
 
     @classmethod
-    def from_ljh_folder(cls, pulse_folder, noise_folder=None, limit=None):
+    def from_ljh_folder(cls, pulse_folder: str, noise_folder: Optional[str]=None, limit: None=None) -> "Channels":
         import os
         assert os.path.isdir(pulse_folder),f"{pulse_folder=} {noise_folder=}"
         if noise_folder is None:
             paths = moss.ljhutil.find_ljh_files(pulse_folder)
-            pairs = ((path, None) for path in paths)
+            pairs: List[Tuple[str, str]] = [(path,'') for path in paths]
         else:
             assert os.path.isdir(noise_folder), f"{pulse_folder=} {noise_folder=}"
             pairs = moss.ljhutil.match_files_by_channel(pulse_folder, noise_folder, limit=limit)
         description = f"from_ljh_folder {pulse_folder=} {noise_folder=}"
         return cls.from_ljh_path_pairs(pairs, description)
 
-    def get_experiment_state_df(self, experiment_state_path=None):
+    def get_experiment_state_df(self, experiment_state_path: Optional[str]=None) -> DataFrame:
         if experiment_state_path is None:
             first_ch = next(iter(self.channels.values()))
-            ljh_path = first_ch.header.df["Filename"][0]
+            df = first_ch.header.df.collect() if isinstance(first_ch.header.df, pl.LazyFrame) else first_ch.header.df
+            ljh_path = df.select("Filename").to_series().item()
             experiment_state_path = moss.ljhutil.experiment_state_path_from_ljh_path(ljh_path)
         df = pl.read_csv(experiment_state_path, new_columns=["unixnano", "state_label"])
         # _col0, _col1 = df.columns
@@ -142,14 +151,14 @@ def get_experiment_state_df(self, experiment_state_path=None):
         df_es = df_es.with_columns(state_label = pl.Series(values=sl_series, dtype=pl.Categorical))
         return df_es
 
-    def with_experiment_state_by_path(self, experiment_state_path=None):
+    def with_experiment_state_by_path(self, experiment_state_path: Optional[str]=None) -> "Channels":
         df_es = self.get_experiment_state_df(experiment_state_path)
         return self.with_experiment_state(df_es)
 
-    def with_experiment_state(self, df_es):
+    def with_experiment_state(self, df_es: DataFrame) -> "Channels":
         # this is not as performant as making use_exprs for states
         # and using .set_sorted on the timestamp column
-        ch2s = {}
+        ch2s: collections.OrderedDict[int, moss.Channel] = collections.OrderedDict()
         for ch_num, ch in self.channels.items():
             ch2s[ch_num] = ch.with_experiment_state_df(df_es)
         return Channels(ch2s, self.description)    
@@ -162,11 +171,11 @@ def with_steps_dict(self, steps_dict):
             ch2s[ch_num] = ch2
         return Channels(ch2s, self.description+"\nfollowed some steps!!")
 
-    def concat_data(self, other_data):
+    def concat_data(self, other_data: "Channels") -> "Channels":
         # sorting here to show intention, but I think set is sorted by insertion order as
         # an implementation detail so this may not do anything
         ch_nums = sorted(list(set(self.channels.keys()).union(other_data.channels.keys())))
-        channels2 = {}
+        channels2:collections.OrderedDict[int, moss.Channel] = collections.OrderedDict()
         for ch_num in ch_nums:
             ch = self.channels[ch_num]
             other_ch = other_data.channels[ch_num]

diff --git a/moss/drift_correction.py b/moss/drift_correction.py
@@ -9,8 +9,13 @@
 import polars as pl
 import typing
 import pylab as plt
+from matplotlib.axes._axes import Axes
+from moss.channel import Channel
+from numpy import ndarray
+from polars.dataframe.frame import DataFrame
+from polars.expr.expr import Expr
 
-def drift_correct_mass(indicator, uncorrected):
+def drift_correct_mass(indicator: ndarray, uncorrected: ndarray) -> "DriftCorrection":
     slope, dc_info = \
             mass.core.analysis_algorithms.drift_correct(indicator, uncorrected)
     offset = dc_info["median_pretrig_mean"]
@@ -27,15 +32,15 @@ def drift_correct_wip(indicator, uncorrected):
 class DriftCorrectStep(CalStep):
     dc: typing.Any
 
-    def calc_from_df(self, df):
+    def calc_from_df(self, df: DataFrame) -> DataFrame:
         indicator_col, uncorrected_col = self.inputs
         slope, offset = self.dc.slope, self.dc.offset
         df2 = df.select(
             (pl.col(uncorrected_col) * (1 + slope * (pl.col(indicator_col) - offset))).alias(self.output[0])
         ).with_columns(df)
         return df2
 
-    def dbg_plot(self, df):
+    def dbg_plot(self, df: DataFrame) -> Axes:
         indicator_col, uncorrected_col = self.inputs
         # breakpoint()
         df_small = (
@@ -56,7 +61,7 @@ def dbg_plot(self, df):
         return plt.gca()
 
     @classmethod
-    def learn(cls, ch, indicator_col, uncorrected_col, corrected_col, use_expr):
+    def learn(cls, ch: Channel, indicator_col: str, uncorrected_col: str, corrected_col: str, use_expr: Expr) -> "DriftCorrectStep":
         if corrected_col is None:
             corrected_col = uncorrected_col + "_dc"
         indicator_s, uncorrected_s = ch.good_serieses([indicator_col, uncorrected_col], use_expr)

diff --git a/moss/filters.py b/moss/filters.py
@@ -3,19 +3,24 @@
 from dataclasses import dataclass
 import moss
 import polars as pl
+from numpy import float64, ndarray
+from polars.dataframe.frame import DataFrame
+from typing import Optional, Tuple, Union
+import matplotlib.pyplot as plt
 
-def fourier_filter(avg_signal, noise_psd, dt, fmax=None, f_3db=None, peak_signal=1.0):
+
+def fourier_filter(avg_signal: ndarray, noise_psd: ndarray, dt: float, fmax: None=None, f_3db: Optional[float]=None, peak_signal: float=1.0) -> "Filter":
     filter, variance = calc_fourier_filter(avg_signal, noise_psd, dt, fmax, f_3db, peak_signal)
     return Filter(filter, variance, dt, filter_type="fourier")    
 
 @dataclass(frozen=True)
 class Filter:
     filter: np.ndarray
-    variance: float
+    variance: float64
     dt: float
     filter_type: str
 
-    def plot(self, axis=None, **plotkwarg):
+    def plot(self, axis: Optional[plt.axes]=None, **plotkwarg):
         if axis is None:
             plt.figure()
             axis = plt.gca()
@@ -26,14 +31,14 @@ def plot(self, axis=None, **plotkwarg):
         axis.set_xlabel("Lag Time (s)")
         axis.figure.tight_layout()
 
-    def frequencies(self):
+    def frequencies(self) -> ndarray:
         n = len(self.filter)
         return np.arange(0, n, dtype=float) * 0.5 / ((n - 1) * self.dt)
 
     def __call__(self, pulse):
         return np.dot(self.filter, pulse)
 
-def apply_fmax(signal_freq_domain, fmax, dt):
+def apply_fmax(signal_freq_domain: ndarray, fmax: None, dt: float) -> ndarray:
     if fmax is None:
         return signal_freq_domain
     n = len(signal_freq_domain)
@@ -42,18 +47,18 @@ def apply_fmax(signal_freq_domain, fmax, dt):
     out[freq>fmax]=0
     return out
 
-def apply_f_3db(signal_freq_domain, f_3db, dt):
+def apply_f_3db(signal_freq_domain: ndarray, f_3db: Union[float,None], dt: float) -> ndarray:
     if f_3db is None:
         return signal_freq_domain
     n = len(signal_freq_domain)
     freq = np.arange(0, n, dtype=float) * 0.5 / ((n - 1) * dt)
     return signal_freq_domain / (1 + (freq * 1.0 / f_3db)**2)
 
-def normalize_filter(filter):
+def normalize_filter(filter: ndarray) -> ndarray:
     filter -= np.mean(filter)
     return filter/np.sqrt(np.dot(filter, filter))
 
-def calc_fourier_filter(avg_signal, noise_psd, dt, fmax=None, f_3db=None, peak_signal=1.0):
+def calc_fourier_filter(avg_signal: ndarray, noise_psd: ndarray, dt: float, fmax: None=None, f_3db: Optional[float]=None, peak_signal: float=1.0) -> Tuple[ndarray, float64]:
     """Compute the Fourier-domain filter and variances for signal processing.
 
     Args:
@@ -86,7 +91,7 @@ def calc_fourier_filter(avg_signal, noise_psd, dt, fmax=None, f_3db=None, peak_s
     variance = 1 / kappa
     return normalize_filter(filter), variance
 
-def filter_data_5lag(filter_values, pulses):
+def filter_data_5lag(filter_values: ndarray, pulses: ndarray) -> Tuple[ndarray, ndarray]:
     # These parameters fit a parabola to any 5 evenly-spaced points
     fit_array = (
         np.array(
@@ -113,7 +118,7 @@ class Filter5LagStep(moss.CalStep):
     filter: Filter
     spectrum: moss.NoisePSD
 
-    def calc_from_df(self, df):
+    def calc_from_df(self, df: DataFrame) -> DataFrame:
         dfs = []
         for df_iter in df.iter_slices(10000):
             peak_x, peak_y = moss.filters.filter_data_5lag(
@@ -124,5 +129,5 @@ def calc_from_df(self, df):
         df2 = df2.rename({"peak_x": self.output[0], "peak_y": self.output[1]})
         return df2
 
-    def dbg_plot(self, df):
+    def dbg_plot(self, df: DataFrame) -> None:
         return self.filter.plot()