From cee676dd97f660881de5ff109eb3016b5c2c49e8 Mon Sep 17 00:00:00 2001
From: Filipe Fernandes <ocefpaf@gmail.com>
Date: Fri, 31 May 2024 19:58:19 +0200
Subject: [PATCH] ruff all and fix lints

---
 .pre-commit-config.yaml            |   2 +-
 ctd/__init__.py                    |   5 +-
 ctd/extras.py                      | 151 ++++++++-------
 ctd/plotting.py                    |  25 ++-
 ctd/processing.py                  | 127 +++++++------
 ctd/read.py                        | 282 ++++++++++++++++-------------
 docs/source/conf.py                |   6 +-
 notebooks/00-reading-data.ipynb    |   3 -
 notebooks/quick_intro.ipynb        |  37 ++--
 pyproject.toml                     |   4 +-
 requirements-dev.txt               |  18 +-
 requirements.txt                   |   2 +-
 ruff.toml                          |  53 ++++++
 tests/test_plotting.py             |  27 ++-
 tests/test_processing.py           |  45 +++--
 tests/test_processing_real_data.py |  28 ++-
 tests/test_read.py                 | 107 ++++++-----
 17 files changed, 522 insertions(+), 400 deletions(-)
 create mode 100644 ruff.toml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c3e6905..a967a2b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,7 +39,7 @@ repos:
     - id: add-trailing-comma
 
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.4.7
+  rev: v0.5.0
   hooks:
     - id: ruff
       args: ["--fix", "--show-fixes"]
diff --git a/ctd/__init__.py b/ctd/__init__.py
index e4adab3..8da35d1 100644
--- a/ctd/__init__.py
+++ b/ctd/__init__.py
@@ -1,6 +1,5 @@
-"""
-Tools to load hydrographic data as pandas DataFrame with some handy methods for
-data pre-processing and analysis.
+"""Tools to load hydrographic data as pandas DataFrame with some handy methods
+for data pre-processing and analysis.
 """
 
 from .plotting import plot_cast
diff --git a/ctd/extras.py b/ctd/extras.py
index 62f920a..34c79e7 100644
--- a/ctd/extras.py
+++ b/ctd/extras.py
@@ -1,16 +1,13 @@
-"""
-Extra functionality for plotting and post-processing.
-"""
+"""Extra functionality for plotting and post-processing."""
 
 import matplotlib.pyplot as plt
 import numpy as np
-import numpy.ma as ma
-from pandas import Series
+import pandas as pd
+from numpy import ma
 
 
 def _extrap1d(interpolator):
-    """
-    How to make scipy.interpolate return an extrapolated result beyond the
+    """How to make scipy.interpolate return an extrapolated result beyond the
     input range.
 
     This is usually bad interpolation! But sometimes useful for pretty pictures,
@@ -25,10 +22,9 @@ def pointwise(x):
         """Pointwise interpolation."""
         if x < xs[0]:
             return ys[0] + (x - xs[0]) * (ys[1] - ys[0]) / (xs[1] - xs[0])
-        elif x > xs[-1]:
+        if x > xs[-1]:
             return ys[-1] + (x - xs[-1]) * (ys[-1] - ys[-2]) / (xs[-1] - xs[-2])
-        else:
-            return interpolator(x)
+        return interpolator(x)
 
     def ufunclike(xs):
         """Return an interpolation ufunc."""
@@ -39,30 +35,34 @@ def ufunclike(xs):
 
 def get_maxdepth(self):
     """Return the maximum depth/pressure of a cast."""
-    valid_last_depth = self.apply(Series.notnull).values.T
-    return np.float_(self.index.values * valid_last_depth).max(axis=1)
-
-
-def extrap_sec(data, dist, depth, w1=1.0, w2=0):
-    """
-    Extrapolates `data` to zones where the shallow stations are shadowed by
+    valid_last_depth = self.apply(pd.Series.notnull).to_numpy().T
+    return np.float64(self.index.to_numpy() * valid_last_depth).max(axis=1)
+
+
+def extrap_sec(
+    data: np.ndarray,
+    dist: np.ndarray,
+    depth: np.ndarray,
+    w1: float = 1.0,
+    w2: float = 0,
+) -> np.ndarray:
+    """Extrapolate `data` to zones where the shallow stations are shadowed by
     the deep stations.  The shadow region usually cannot be extrapolates via
     linear interpolation.
 
     The extrapolation is applied using the gradients of the `data` at a certain
     level.
 
-    Parameters
-    ----------
-    data : array_like
-          Data to be extrapolated
-    dist : array_like
-           Stations distance
-    fd : float
-         Decay factor [0-1]
+    Inputs
+    ------
+    data : Data to be extrapolated
+    dist : Stations distance
+    depth : Depth of the profile
+    w1 : weights [0-1]
+    w2 : weights [0-1]
 
 
-    Returns
+    Outputs
     -------
     Sec_extrap : array_like
                  Extrapolated variable
@@ -72,39 +72,45 @@ def extrap_sec(data, dist, depth, w1=1.0, w2=0):
 
     new_data1 = []
     for row in data:
+        new_row = row.copy()
         mask = ~np.isnan(row)
         if mask.any():
             y = row[mask]
             if y.size == 1:
-                row = np.repeat(y, len(mask))
+                new_row = np.repeat(y, len(mask))
             else:
                 x = dist[mask]
                 f_i = interp1d(x, y)
                 f_x = _extrap1d(f_i)
-                row = f_x(dist)
-        new_data1.append(row)
+                new_row = f_x(dist)
+        new_data1.append(new_row)
 
     new_data2 = []
     for col in data.T:
+        new_col = col.copy()
         mask = ~np.isnan(col)
         if mask.any():
             y = col[mask]
             if y.size == 1:
-                col = np.repeat(y, len(mask))
+                new_col = np.repeat(y, len(mask))
             else:
                 z = depth[mask]
                 f_i = interp1d(z, y)
                 f_z = _extrap1d(f_i)
-                col = f_z(depth)
-        new_data2.append(col)
+                new_col = f_z(depth)
+        new_data2.append(new_col)
 
-    new_data = np.array(new_data1) * w1 + np.array(new_data2).T * w2
-    return new_data
+    return np.array(new_data1) * w1 + np.array(new_data2).T * w2
 
 
-def gen_topomask(h, lon, lat, dx=1.0, kind="linear", plot=False):
-    """
-    Generates a topography mask from an oceanographic transect taking the
+def gen_topomask(
+    h: np.ndarray,
+    lon: np.ndarray,
+    lat: np.ndarray,
+    dx: float = 1.0,
+    kind: str = "linear",
+) -> tuple:
+    """Generate a topography mask from an oceanographic transect taking the
     deepest CTD scan as the depth of each station.
 
     Inputs
@@ -119,8 +125,6 @@ def gen_topomask(h, lon, lat, dx=1.0, kind="linear", plot=False):
     kind : string, optional
            Type of the interpolation to be performed.
            See scipy.interpolate.interp1d documentation for details.
-    plot : bool
-           Whether to plot mask for visualization.
 
     Outputs
     -------
@@ -134,7 +138,6 @@ def gen_topomask(h, lon, lat, dx=1.0, kind="linear", plot=False):
     André Palóczy Filho (paloczy@gmail.com) --  October/2012
 
     """
-
     import gsw
     from scipy.interpolate import interp1d
 
@@ -142,18 +145,26 @@ def gen_topomask(h, lon, lat, dx=1.0, kind="linear", plot=False):
     # Distance in km.
     x = np.append(0, np.cumsum(gsw.distance(lon, lat)[0] / 1e3))
     h = -gsw.z_from_p(h, lat.mean())
-    Ih = interp1d(x, h, kind=kind, bounds_error=False, fill_value=h[-1])
+    ih = interp1d(x, h, kind=kind, bounds_error=False, fill_value=h[-1])
     xm = np.arange(0, x.max() + dx, dx)
-    hm = Ih(xm)
+    hm = ih(xm)
 
     return xm, hm
 
 
-def plot_section(self, reverse=False, filled=False, **kw):
+def plot_section(  # noqa: PLR0915
+    self: pd.DataFrame,
+    *,
+    reverse: bool = False,
+    filled: bool = False,
+    **kw: dict,
+) -> tuple:
     """Plot a sequence of CTD casts as a section."""
     import gsw
 
-    lon, lat, data = list(map(np.asanyarray, (self.lon, self.lat, self.values)))
+    lon, lat, data = list(
+        map(np.asanyarray, (self.lon, self.lat, self.to_numpy())),
+    )
     data = ma.masked_invalid(data)
     h = self.get_maxdepth()
     if reverse:
@@ -163,7 +174,7 @@ def plot_section(self, reverse=False, filled=False, **kw):
         h = h[::-1]
     lon, lat = map(np.atleast_2d, (lon, lat))
     x = np.append(0, np.cumsum(gsw.distance(lon, lat)[0] / 1e3))
-    z = self.index.values.astype(float)
+    z = self.index.to_numpy().astype(float)
 
     if filled:  # CAVEAT: this method cause discontinuities.
         data = data.filled(fill_value=np.nan)
@@ -248,51 +259,53 @@ def plot_section(self, reverse=False, filled=False, **kw):
     return fig, ax, cb
 
 
-def cell_thermal_mass(temperature, conductivity):
-    """
-    Sample interval is measured in seconds.
+def cell_thermal_mass(
+    temperature: pd.Series,
+    conductivity: pd.Series,
+) -> pd.Series:
+    """Sample interval is measured in seconds.
     Temperature in degrees.
     CTM is calculated in S/m.
 
     """
-
     alpha = 0.03  # Thermal anomaly amplitude.
     beta = 1.0 / 7  # Thermal anomaly time constant (1/beta).
 
     sample_interval = 1 / 15.0
     a = 2 * alpha / (sample_interval * beta + 2)
     b = 1 - (2 * a / alpha)
-    dCodT = 0.1 * (1 + 0.006 * [temperature - 20])
-    dT = np.diff(temperature)
-    ctm = -1.0 * b * conductivity + a * (dCodT) * dT  # [S/m]
-    return ctm
+    dc_o_dt = 0.1 * (1 + 0.006 * [temperature - 20])
+    dt = np.diff(temperature)
+    return -1.0 * b * conductivity + a * (dc_o_dt) * dt  # [S/m]
 
 
-def mixed_layer_depth(CT, method="half degree"):
+def mixed_layer_depth(ct: pd.Series, method: str = "half degree") -> pd.Series:
     """Return the mixed layer depth based on the "half degree" criteria."""
-    if method == "half degree":
-        mask = CT[0] - CT < 0.5
-    else:
-        mask = np.zeros_like(CT)
-    return Series(mask, index=CT.index, name="MLD")
+    half_degree = 0.5
+    mask = (
+        ct[0] - ct < half_degree
+        if method == "half degree"
+        else np.zeros_like(ct)
+    )
+    return pd.Series(mask, index=ct.index, name="MLD")
 
 
-def barrier_layer_thickness(SA, CT):
-    """
-    Compute the thickness of water separating the mixed surface layer from the
-    thermocline.  A more precise definition would be the difference between
-    mixed layer depth (MLD) calculated from temperature minus the mixed layer
-    depth calculated using density.
+def barrier_layer_thickness(sa: pd.Series, ct: pd.Series) -> pd.Series:
+    """Compute the thickness of water separating the mixed surface layer from
+    the thermocline.
+    A more precise definition would be the difference between mixed layer depth
+    (MLD) calculated from temperature minus the mixed layer depth calculated
+    using density.
 
     """
     import gsw
 
-    sigma_theta = gsw.sigma0(SA, CT)
-    mask = mixed_layer_depth(CT)
+    sigma_theta = gsw.sigma0(sa, ct)
+    mask = mixed_layer_depth(ct)
     mld = np.where(mask)[0][-1]
     sig_surface = sigma_theta[0]
-    sig_bottom_mld = gsw.sigma0(SA[0], CT[mld])
+    sig_bottom_mld = gsw.sigma0(sa[0], ct[mld])
     d_sig_t = sig_surface - sig_bottom_mld
     d_sig = sigma_theta - sig_bottom_mld
     mask = d_sig < d_sig_t  # Barrier layer.
-    return Series(mask, index=SA.index, name="BLT")
+    return pd.Series(mask, index=sa.index, name="BLT")
diff --git a/ctd/plotting.py b/ctd/plotting.py
index 260f80f..0558e1e 100644
--- a/ctd/plotting.py
+++ b/ctd/plotting.py
@@ -1,20 +1,25 @@
-"""
-Plotting module
-"""
+"""Plotting module."""
+
+from __future__ import annotations
 
 import matplotlib.pyplot as plt
 import pandas as pd
 from pandas_flavor import register_dataframe_method, register_series_method
 
+cast = pd.DataFrame | pd.Series
+
 
 @register_series_method
 @register_dataframe_method
-def plot_cast(df, secondary_y=False, label=None, ax=None, *args, **kwargs):
-    """
-    Plot a CTD variable with the index in the y-axis instead of x-axis.
-
-    """
-
+def plot_cast(
+    df: cast,
+    *,
+    secondary_y: bool = False,
+    label: str | None = None,
+    ax: plt.Axes | None = None,
+    **kwargs: dict,
+) -> cast:
+    """Plot a CTD variable with the index in the y-axis instead of x-axis."""
     fignums = plt.get_fignums()
     if ax is None and not fignums:
         ax = plt.axes()
@@ -44,7 +49,7 @@ def plot_cast(df, secondary_y=False, label=None, ax=None, *args, **kwargs):
             ax.plot(series, series.index, label=labels[k])
     elif isinstance(df, pd.Series):
         label = label if label else str(df.name)
-        ax.plot(df.values, df.index, *args, label=label, **kwargs)
+        ax.plot(df.values, df.index, label=label, **kwargs)
 
     ax.set_ylabel(ylabel)
     ax.set_xlabel(xlabel)
diff --git a/ctd/processing.py b/ctd/processing.py
index 8aecc11..cdecfb9 100644
--- a/ctd/processing.py
+++ b/ctd/processing.py
@@ -1,37 +1,35 @@
-"""
-Processing module
-"""
+"""Processing module."""
 
 import numpy as np
-import numpy.ma as ma
 import pandas as pd
+from numpy import ma
 from pandas_flavor import register_dataframe_method, register_series_method
 
+cast = pd.DataFrame | pd.Series
 
-def _rolling_window(data, block):
-    """
-    http://stackoverflow.com/questions/4936620/
+
+def _rolling_window(data: np.ndarray, block: int) -> np.ndarray:
+    """http://stackoverflow.com/questions/4936620/
     Using strides for an efficient moving average filter.
 
     """
     shape = data.shape[:-1] + (data.shape[-1] - block + 1, block)
-    strides = data.strides + (data.strides[-1],)
+    strides = (*data.strides, data.strides[-1])
     return np.lib.stride_tricks.as_strided(data, shape=shape, strides=strides)
 
 
 @register_series_method
 @register_dataframe_method
-def remove_above_water(df):
+def remove_above_water(df: cast) -> cast:
     """Remove all data above the water line."""
     return remove_up_to(df, idx=0)
 
 
 @register_series_method
 @register_dataframe_method
-def remove_up_to(df, idx):
-    """
-    Remove all the data above a certain index value where index can be pressure or depth.
-
+def remove_up_to(df: cast, idx: int) -> cast:
+    """Remove all the data above a certain index value where index can be
+    pressure or depth.
     """
     new_df = df.copy()
     return new_df[new_df.index >= idx]
@@ -39,8 +37,8 @@ def remove_up_to(df, idx):
 
 @register_series_method
 @register_dataframe_method
-def split(df):
-    """Returns a tuple with down/up-cast."""
+def split(df: cast) -> cast:
+    """Return a tuple with down/up-cast."""
     idx = df.index.argmax() + 1
     down = df.iloc[:idx]
     # Reverse index to orient it as a CTD cast.
@@ -50,9 +48,12 @@ def split(df):
 
 @register_series_method
 @register_dataframe_method
-def lp_filter(df, sample_rate=24.0, time_constant=0.15):
-    """
-    Filter a series with `time_constant` (use 0.15 s for pressure), and for
+def lp_filter(
+    df: cast,
+    sample_rate: float = 24.0,
+    time_constant: float = 0.15,
+) -> cast:
+    """Filter a series with `time_constant` (use 0.15 s for pressure), and for
     a signal of `sample_rate` in Hertz (24 Hz for 911+).
     NOTE: 911+ systems do not require filter for temperature nor salinity.
 
@@ -65,9 +66,9 @@ def lp_filter(df, sample_rate=24.0, time_constant=0.15):
     >>> raw = ctd.from_cnv(data_path.joinpath("CTD-spiked-unfiltered.cnv.bz2"))
     >>> prc = ctd.from_cnv(data_path.joinpath("CTD-spiked-filtered.cnv.bz2"))
     >>> kw = {"sample_rate": 24.0, "time_constant": 0.15}
-    >>> original = prc.index.values
-    >>> unfiltered = raw.index.values
-    >>> filtered = raw.lp_filter(**kw).index.values
+    >>> original = prc.index.to_numpy()
+    >>> unfiltered = raw.index.to_numpy()
+    >>> filtered = raw.lp_filter(**kw).index.to_numpy()
     >>> fig, ax = plt.subplots()
     >>> (l1,) = ax.plot(original, "k", label="original")
     >>> (l2,) = ax.plot(unfiltered, "r", label="unfiltered")
@@ -79,26 +80,22 @@ def lp_filter(df, sample_rate=24.0, time_constant=0.15):
     https://scipy-cookbook.readthedocs.io/items/FIRFilter.html
 
     """
-
     from scipy import signal
 
     # Butter is closer to what SBE is doing with their cosine filter.
-    Wn = (1.0 / time_constant) / (sample_rate * 2.0)
-    b, a = signal.butter(2, Wn, "low")
+    wn = (1.0 / time_constant) / (sample_rate * 2.0)
+    b, a = signal.butter(2, wn, "low")
     new_df = df.copy()
-    new_df.index = signal.filtfilt(b, a, df.index.values)
+    new_df.index = signal.filtfilt(b, a, df.index.to_numpy())
     return new_df
 
 
 @register_series_method
 @register_dataframe_method
-def press_check(df):
-    """
-    Remove pressure reversals from the index.
-
-    """
+def press_check(df: cast) -> cast:
+    """Remove pressure reversals from the index."""
     new_df = df.copy()
-    press = new_df.copy().index.values
+    press = new_df.copy().index.to_numpy()
 
     ref = press[0]
     inversions = np.diff(np.r_[press, press[-1]]) < 0
@@ -108,11 +105,11 @@ def press_check(df):
             ref = press[k]
             cut = press[k + 1 :] < ref
             mask[k + 1 :][cut] = True
-    new_df[mask] = np.NaN
+    new_df[mask] = np.nan
     return new_df
 
 
-def _bindata(series, delta, method):
+def _bindata(series: pd.Series, delta: int, method: str) -> pd.Series:
     """Average the data into bins of the size `delta`."""
     start = np.ceil(series.index[0])
     stop = np.floor(series.index[-1])
@@ -125,17 +122,17 @@ def _bindata(series, delta, method):
         data = np.interp(new_index, series.index, series)
         return pd.Series(data, index=new_index, name=series.name)
     else:
+        msg = f"Expected method `average` or `interpolate`, but got {method}."
         raise ValueError(
-            f"Expected method `average` or `interpolate`, but got {method}.",
+            msg,
         )
     return new_series
 
 
 @register_series_method
 @register_dataframe_method
-def bindata(df, delta=1.0, method="average"):
-    """
-    Bin average the index (usually pressure) to a given interval (default
+def bindata(df: cast, delta: float = 1.0, method: str = "average") -> cast:
+    """Bin average the index (usually pressure) to a given interval (default
     delta = 1).
 
     """
@@ -146,14 +143,12 @@ def bindata(df, delta=1.0, method="average"):
     return new_df
 
 
-def _despike(series, n1, n2, block, keep):
-    """
-    Wild Edit Seabird-like function.  Passes with Standard deviation
+def _despike(series: pd.Series, n1: int, n2: int, block: int) -> pd.Series:
+    """Wild Edit Seabird-like function.  Passes with Standard deviation
     `n1` and `n2` with window size `block`.
 
     """
-
-    data = series.values.astype(float).copy()
+    data = series.to_numpy().astype(float).copy()
     roll = _rolling_window(data, block)
     roll = ma.masked_invalid(roll)
     std = n1 * roll.std(axis=1)
@@ -161,8 +156,10 @@ def _despike(series, n1, n2, block, keep):
     # Use the last value to fill-up.
     std = np.r_[std, np.tile(std[-1], block - 1)]
     mean = np.r_[mean, np.tile(mean[-1], block - 1)]
-    mask = np.abs(data - mean.filled(fill_value=np.NaN)) > std.filled(fill_value=np.NaN)
-    data[mask] = np.NaN
+    mask = np.abs(data - mean.filled(fill_value=np.nan)) > std.filled(
+        fill_value=np.nan,
+    )
+    data[mask] = np.nan
 
     # Pass two recompute the mean and std without the flagged values from pass
     # one and removed the flagged data.
@@ -173,34 +170,32 @@ def _despike(series, n1, n2, block, keep):
     # Use the last value to fill-up.
     std = np.r_[std, np.tile(std[-1], block - 1)]
     mean = np.r_[mean, np.tile(mean[-1], block - 1)]
-    values = series.values.astype(float)
-    mask = np.abs(values - mean.filled(fill_value=np.NaN)) > std.filled(
-        fill_value=np.NaN,
+    values = series.to_numpy().astype(float)
+    mask = np.abs(values - mean.filled(fill_value=np.nan)) > std.filled(
+        fill_value=np.nan,
     )
 
     clean = series.astype(float).copy()
-    clean[mask] = np.NaN
+    clean[mask] = np.nan
     return clean
 
 
 @register_series_method
 @register_dataframe_method
-def despike(df, n1=2, n2=20, block=100, keep=0):
-    """
-    Wild Edit Seabird-like function.  Passes with Standard deviation
+def despike(df: cast, n1: int = 2, n2: int = 20, block: int = 100) -> cast:
+    """Wild Edit Seabird-like function.  Passes with Standard deviation
     `n1` and `n2` with window size `block`.
 
     """
     if isinstance(df, pd.Series):
-        new_df = _despike(df, n1=n1, n2=n2, block=block, keep=keep)
+        new_df = _despike(df, n1=n1, n2=n2, block=block)
     else:
-        new_df = df.apply(_despike, n1=n1, n2=n2, block=block, keep=keep)
+        new_df = df.apply(_despike, n1=n1, n2=n2, block=block)
     return new_df
 
 
-def _smooth(series, window_len, window):
+def _smooth(series: pd.Series, window_len: int, window: str) -> pd.Series:
     """Smooth the data using a window with requested size."""
-
     windows = {
         "flat": np.ones,
         "hanning": np.hanning,
@@ -208,15 +203,17 @@ def _smooth(series, window_len, window):
         "bartlett": np.bartlett,
         "blackman": np.blackman,
     }
-    data = series.values.copy()
+    data = series.to_numpy().copy()
 
-    if window_len < 3:
+    min_window_length = 3
+    if window_len < min_window_length:
         return pd.Series(data, index=series.index, name=series.name)
 
     if window not in list(windows.keys()):
+        msg = """window must be one of 'flat', 'hanning',
+                         'hamming', 'bartlett', 'blackman'"""
         raise ValueError(
-            """window must be one of 'flat', 'hanning',
-                         'hamming', 'bartlett', 'blackman'""",
+            msg,
         )
 
     s = np.r_[
@@ -234,7 +231,7 @@ def _smooth(series, window_len, window):
 
 @register_series_method
 @register_dataframe_method
-def smooth(df, window_len=11, window="hanning"):
+def smooth(df: cast, window_len: int = 11, window: str = "hanning") -> cast:
     """Smooth the data using a window with requested size."""
     if isinstance(df, pd.Series):
         new_df = _smooth(df, window_len=window_len, window=window)
@@ -243,23 +240,21 @@ def smooth(df, window_len=11, window="hanning"):
     return new_df
 
 
-def _movingaverage(series, window_size=48):
-    """Moving average function on a pandas series."""
+def _movingaverage(series: pd.Series, window_size: int = 48) -> pd.Series:
+    """Perform Moving Average function on a pandas series."""
     window = np.ones(int(window_size)) / float(window_size)
     return pd.Series(np.convolve(series, window, "same"), index=series.index)
 
 
 @register_series_method
 @register_dataframe_method
-def movingaverage(df, window_size=48):
-    """
-    Moving average on a data frame or series.
+def movingaverage(df: cast, window_size: int = 48) -> cast:
+    """Perform Moving Average on a DataFrame or Series.
 
     Inputs:
       windows_size : integer
 
     """
-
     if isinstance(df, pd.Series):
         new_df = _movingaverage(df, window_size=window_size)
     else:
diff --git a/ctd/read.py b/ctd/read.py
index 24fbfbe..99dbea9 100644
--- a/ctd/read.py
+++ b/ctd/read.py
@@ -1,15 +1,15 @@
-"""
-Read module
-"""
+"""Read module."""
+
+from __future__ import annotations
 
 import bz2
 import collections
+import datetime
 import gzip
 import linecache
 import re
 import warnings
 import zipfile
-from datetime import datetime
 from io import StringIO
 from pathlib import Path
 
@@ -19,7 +19,7 @@
 import pandas as pd
 
 
-def _basename(fname):
+def _basename(fname: str | Path) -> (str, str, str):
     """Return file name without path."""
     if not isinstance(fname, Path):
         fname = Path(fname)
@@ -27,14 +27,13 @@ def _basename(fname):
     return path, name, ext
 
 
-def _normalize_names(name):
+def _normalize_names(name: str) -> str:
     """Normalize column names."""
     name = name.strip()
-    name = name.strip("*")
-    return name
+    return name.strip("*")
 
 
-def _open_compressed(fname):
+def _open_compressed(fname: Path) -> str:
     """Open compressed gzip, gz, zip or bz2 files."""
     extension = fname.suffix.casefold()
     if extension in [".gzip", ".gz"]:
@@ -50,15 +49,19 @@ def _open_compressed(fname):
         name = zfile.namelist()[0]
         cfile = zfile.open(name)
     else:
+        msg = (
+            "Unrecognized file extension. "
+            f"Expected .gzip, .bz2, or .zip, got {extension}"
+        )
         raise ValueError(
-            f"Unrecognized file extension. Expected .gzip, .bz2, or .zip, got {extension}",
+            msg,
         )
     contents = cfile.read()
     cfile.close()
     return contents
 
 
-def _read_file(fname):
+def _read_file(fname: str | Path | StringIO) -> StringIO:
     """Read file contents, or read from StringIO object."""
     if isinstance(fname, StringIO):
         fname.seek(0)
@@ -71,11 +74,15 @@ def _read_file(fname):
     extension = fname.suffix.casefold()
     if extension in [".gzip", ".gz", ".bz2", ".zip"]:
         contents = _open_compressed(fname)
-    elif extension in [".cnv", ".edf", ".txt", ".ros", ".btl"]:
+    elif extension in [".cnv", ".edf", ".txt", ".ros", ".btl", ".bl", ".csv"]:
         contents = fname.read_bytes()
     else:
+        msg = (
+            "Unrecognized file extension. "
+            f"Expected .cnv, .edf, .txt, .ros, or .btl got {extension}"
+        )
         raise ValueError(
-            f"Unrecognized file extension. Expected .cnv, .edf, .txt, .ros, or .btl got {extension}",
+            msg,
         )
     # Read as bytes but we need to return strings for the parsers.
     encoding = chardet.detect(contents)["encoding"]
@@ -83,27 +90,34 @@ def _read_file(fname):
     return StringIO(text)
 
 
-def _remane_duplicate_columns(names):
+def _remane_duplicate_columns(names: str) -> str:
     """Rename a column when it is duplicated."""
     items = collections.Counter(names).items()
     dup = []
     for item, count in items:
-        if count > 2:
+        if count > 2:  # noqa: PLR2004
+            msg = (
+                "Cannot handle more than two duplicated columns. "
+                f"Found {count} for {item}."
+            )
             raise ValueError(
-                f"Cannot handle more than two duplicated columns. Found {count} for {item}.",
+                msg,
             )
         if count > 1:
             dup.append(item)
 
-    # since we can assume there are only two instances of a word in the list, how about we find the last
-    # index of an instance, which will be the second occurrence of the item
-    second_occurrences = [len(names) - names[::-1].index(item) - 1 for item in dup]
+    # We can assume there are only two instances of a word in the list,
+    # we find the last index of an instance,
+    # which will be the second occurrence of the item.
+    second_occurrences = [
+        len(names) - names[::-1].index(item) - 1 for item in dup
+    ]
     for idx in second_occurrences:
         names[idx] = f"{names[idx]}_"
     return names
 
 
-def _parse_seabird(lines, ftype):
+def _parse_seabird(lines: list, ftype: str) -> dict:  # noqa: C901, PLR0912, PLR0915
     """Parse searbird formats."""
     # Initialize variables.
     lon = lat = time = None, None, None
@@ -112,15 +126,15 @@ def _parse_seabird(lines, ftype):
 
     metadata = {}
     header, config, names = [], [], []
-    for k, line in enumerate(lines):
-        line = line.strip()
+    for k, raw_line in enumerate(lines):
+        line = raw_line.strip()
 
-        # Only cnv has columns names, for bottle files we will use the variable row.
-        if ftype == "cnv":
-            if "# name" in line:
-                name, unit = line.split("=")[1].split(":")
-                name, unit = list(map(_normalize_names, (name, unit)))
-                names.append(name)
+        # Only cnv has columns names,
+        # for bottle files we will use the variable row.
+        if ftype == "cnv" and "# name" in line:
+            name, unit = line.split("=")[1].split(":")
+            name, unit = list(map(_normalize_names, (name, unit)))
+            names.append(name)
 
         # Seabird headers starts with *.
         if line.startswith("*"):
@@ -137,27 +151,32 @@ def _parse_seabird(lines, ftype):
         if "NMEA Latitude" in line:
             hemisphere = line[-1]
             lat = line.strip(hemisphere).split("=")[1].strip()
-            lat = np.float_(lat.split())
+            lat = np.float64(lat.split())
             if hemisphere == "S":
                 lat = -(lat[0] + lat[1] / 60.0)
             elif hemisphere == "N":
                 lat = lat[0] + lat[1] / 60.0
             else:
-                raise ValueError("Latitude not recognized.")
+                msg = "Latitude not recognized."
+                raise ValueError(msg)
         if "NMEA Longitude" in line:
             hemisphere = line[-1]
             lon = line.strip(hemisphere).split("=")[1].strip()
-            lon = np.float_(lon.split())
+            lon = np.float64(lon.split())
             if hemisphere == "W":
                 lon = -(lon[0] + lon[1] / 60.0)
             elif hemisphere == "E":
                 lon = lon[0] + lon[1] / 60.0
             else:
-                raise ValueError("Latitude not recognized.")
+                msg = "Latitude not recognized."
+                raise ValueError(msg)
         if "NMEA UTC (Time)" in line:
             time = line.split("=")[-1].strip()
             # Should use some fuzzy datetime parser to make this more robust.
-            time = datetime.strptime(time, "%b %d %Y %H:%M:%S")
+            time = datetime.datetime.strptime(
+                time,
+                "%b %d %Y %H:%M:%S",
+            ).astimezone(datetime.UTC)
 
         # cnv file header ends with *END* while
         if ftype == "cnv":
@@ -174,7 +193,7 @@ def _parse_seabird(lines, ftype):
                 # Fix commonly occurring problem when Sbeox.* exists in the file
                 # the name is concatenated to previous parameter
                 # example:
-                #   CStarAt0Sbeox0Mm/Kg to CStarAt0 Sbeox0Mm/Kg (really two different params)
+                #   CStarAt0Sbeox0Mm/Kg to CStarAt0 Sbeox0Mm/Kg
                 line = re.sub(r"(\S)Sbeox", "\\1 Sbeox", line)
 
                 names = line.split()
@@ -198,10 +217,10 @@ def _parse_seabird(lines, ftype):
     return metadata
 
 
-def from_bl(fname):
-    """Read Seabird bottle-trip (bl) file
+def from_bl(fname: str | Path) -> pd.DataFrame:
+    """Read Seabird bottle-trip (bl) file.
 
-    Example
+    Example:
     -------
     >>> from pathlib import Path
     >>> import ctd
@@ -211,24 +230,24 @@ def from_bl(fname):
     datetime.datetime(2018, 6, 25, 20, 8, 55)
 
     """
-    df = pd.read_csv(
-        fname,
+    f = _read_file(fname)
+    cast = pd.read_csv(
+        f,
         skiprows=2,
         parse_dates=[1],
         index_col=0,
         names=["bottle_number", "time", "startscan", "endscan"],
     )
-    df._metadata = {
+    cast._metadata = {  # noqa: SLF001
         "time_of_reset": pd.to_datetime(
             linecache.getline(str(fname), 2)[6:-1],
         ).to_pydatetime(),
     }
-    return df
+    return cast
 
 
-def from_btl(fname):
-    """
-    DataFrame constructor to open Seabird CTD BTL-ASCII format.
+def from_btl(fname: str | Path) -> pd.DataFrame:
+    """DataFrame constructor to open Seabird CTD BTL-ASCII format.
 
     Examples
     --------
@@ -243,7 +262,7 @@ def from_btl(fname):
 
     f.seek(0)
 
-    df = pd.read_fwf(
+    cast = pd.read_fwf(
         f,
         header=None,
         index_col=False,
@@ -258,22 +277,24 @@ def from_btl(fname):
     # Also needs date,time,and bottle number to be converted to one per line.
 
     # Get row types, see what you have: avg, std, min, max or just avg, std.
-    rowtypes = df[df.columns[-1]].unique()
+    rowtypes = cast[cast.columns[-1]].unique()
     # Get times and dates which occur on second line of each bottle.
     date_idx = metadata["names"].index("Date")
-    dates = df.iloc[:: len(rowtypes), date_idx].reset_index(drop=True)
-    times = df.iloc[1 :: len(rowtypes), date_idx].reset_index(drop=True)
+    dates = cast.iloc[:: len(rowtypes), date_idx].reset_index(drop=True)
+    times = cast.iloc[1 :: len(rowtypes), date_idx].reset_index(drop=True)
     datetimes = dates + " " + times
 
     # Fill the Date column with datetimes.
-    df.loc[:: len(rowtypes), "Date"] = datetimes.values
-    df.loc[1 :: len(rowtypes), "Date"] = datetimes.values
+    cast.loc[:: len(rowtypes), "Date"] = datetimes.to_numpy()
+    cast.loc[1 :: len(rowtypes), "Date"] = datetimes.to_numpy()
 
     # Fill missing rows.
-    df["Bottle"] = df["Bottle"].fillna(method="ffill")
-    df["Date"] = df["Date"].fillna(method="ffill")
+    cast["Bottle"] = cast["Bottle"].ffill()
+    cast["Date"] = cast["Date"].ffill()
 
-    df["Statistic"] = df["Statistic"].str.lstrip("(").str.rstrip(")")  # (avg) to avg
+    cast["Statistic"] = (
+        cast["Statistic"].str.lstrip("(").str.rstrip(")")
+    )  # (avg) to avg
 
     if "name" not in metadata:
         name = _basename(fname)[1]
@@ -288,26 +309,25 @@ def from_btl(fname):
         "Statistic": str,
         "Date": str,
     }
-    for column in df.columns:
+    for column in cast.columns:
         if column in dtypes:
-            df[column] = df[column].astype(dtypes[column])
+            cast[column] = cast[column].astype(dtypes[column])
         else:
             try:
-                df[column] = df[column].astype(float)
+                cast[column] = cast[column].astype(float)
             except ValueError:
                 warnings.warn(
                     f"Could not convert {column} to float.",
                     stacklevel=2,
                 )
 
-    df["Date"] = pd.to_datetime(df["Date"])
-    df._metadata = metadata
-    return df
+    cast["Date"] = pd.to_datetime(cast["Date"])
+    cast._metadata = metadata  # noqa: SLF001
+    return cast
 
 
-def from_edf(fname):
-    """
-    DataFrame constructor to open XBT EDF ASCII format.
+def from_edf(fname: str | Path) -> pd.DataFrame:  # noqa: C901, PLR0912
+    """DataFrame constructor to open XBT EDF ASCII format.
 
     Examples
     --------
@@ -320,15 +340,15 @@ def from_edf(fname):
     """
     f = _read_file(fname)
     header, names = [], []
-    for k, line in enumerate(f.readlines()):
-        line = line.strip()
+    for k, raw_line in enumerate(f.readlines()):
+        line = raw_line.strip()
         if line.startswith("Serial Number"):
             serial = line.strip().split(":")[1].strip()
         elif line.startswith("Latitude"):
             try:
                 hemisphere = line[-1]
                 lat = line.strip(hemisphere).split(":")[1].strip()
-                lat = np.float_(lat.split())
+                lat = np.float64(lat.split())
                 if hemisphere == "S":
                     lat = -(lat[0] + lat[1] / 60.0)
                 elif hemisphere == "N":
@@ -339,7 +359,7 @@ def from_edf(fname):
             try:
                 hemisphere = line[-1]
                 lon = line.strip(hemisphere).split(":")[1].strip()
-                lon = np.float_(lon.split())
+                lon = np.float64(lon.split())
                 if hemisphere == "W":
                     lon = -(lon[0] + lon[1] / 60.0)
                 elif hemisphere == "E":
@@ -356,18 +376,18 @@ def from_edf(fname):
             break
 
     f.seek(0)
-    df = pd.read_csv(
+    cast = pd.read_csv(
         f,
         header=None,
         index_col=None,
         names=names,
         skiprows=skiprows,
-        delim_whitespace=True,
+        sep=r"\s+",
     )
     f.close()
 
-    df.set_index("depth", drop=True, inplace=True)
-    df.index.name = "Depth [m]"
+    cast = cast.set_index("depth", drop=True)
+    cast.index.name = "Depth [m]"
     name = _basename(fname)[1]
 
     metadata = {
@@ -377,13 +397,12 @@ def from_edf(fname):
         "header": "\n".join(header),
         "serial": serial,
     }
-    df._metadata = metadata
-    return df
+    cast._metadata = metadata  # noqa: SLF001
+    return cast
 
 
-def from_cnv(fname):
-    """
-    DataFrame constructor to open Seabird CTD CNV-ASCII format.
+def from_cnv(fname: str | Path) -> pd.DataFrame:
+    """DataFrame constructor to open Seabird CTD CNV-ASCII format.
 
     Examples
     --------
@@ -399,13 +418,13 @@ def from_cnv(fname):
     metadata = _parse_seabird(f.readlines(), ftype="cnv")
 
     f.seek(0)
-    df = pd.read_fwf(
+    cast = pd.read_fwf(
         f,
         header=None,
         index_col=None,
         names=metadata["names"],
         skiprows=metadata["skiprows"],
-        delim_whitespace=True,
+        sep=r"\s+",
         widths=[11] * len(metadata["names"]),
     )
     f.close()
@@ -422,56 +441,60 @@ def from_cnv(fname):
         "depSM",
         "prDE",
     ]
-    df.columns = df.columns.str.strip()
-    prkey = [key for key in prkeys if key in df.columns]
+    cast.columns = cast.columns.str.strip()
+    prkey = [key for key in prkeys if key in cast.columns]
     if len(prkey) == 0:
-        raise ValueError("Expected one pressure/depth column, didn't receive any")
-    elif len(prkey) > 1:
-        # if multiple keys present then keep the first one
+        msg = "Expected one pressure/depth column, didn't receive any"
+        raise ValueError(
+            msg,
+        )
+    if len(prkey) > 1:
+        # If multiple keys present then keep the first one.
         prkey = prkey[0]
 
-    df.set_index(prkey, drop=True, inplace=True)
-    df.index.name = "Pressure [dbar]"
+    cast = cast.set_index(prkey, drop=True)
+    cast.index.name = "Pressure [dbar]"
     if prkey == "depSM":
         lat = metadata.get("lat", None)
         if lat is not None:
-            df.index = gsw.p_from_z(
-                df.index,
+            cast.index = gsw.p_from_z(
+                cast.index,
                 lat,
                 geo_strf_dyn_height=0,
                 sea_surface_geopotential=0,
             )
         else:
-            warnings.war(
-                f"Missing latitude information. Cannot compute pressure! Your index is {prkey}, "
-                "please compute pressure manually with `gsw.p_from_z` and overwrite your index.",
+            msg = (
+                "Missing latitude information. Cannot compute pressure! "
+                f"Your index is {prkey}, please compute pressure manually "
+                "with `gsw.p_from_z` and overwrite your index."
             )
-            df.index.name = prkey
+            warnings.war(msg)
+            cast.index.name = prkey
 
     if "name" not in metadata:
         name = _basename(fname)[1]
         metadata["name"] = str(name)
 
     dtypes = {"bpos": int, "pumps": bool, "flag": bool}
-    for column in df.columns:
+    for column in cast.columns:
         if column in dtypes:
-            df[column] = df[column].astype(dtypes[column])
+            cast[column] = cast[column].astype(dtypes[column])
         else:
             try:
-                df[column] = df[column].astype(float)
+                cast[column] = cast[column].astype(float)
             except ValueError:
                 warnings.warn(
                     f"Could not convert {column} to float.",
                     stacklevel=2,
                 )
 
-    df._metadata = metadata
-    return df
+    cast._metadata = metadata  # noqa: SLF001
+    return cast
 
 
-def from_fsi(fname, skiprows=9):
-    """
-    DataFrame constructor to open Falmouth Scientific, Inc. (FSI) CTD
+def from_fsi(fname: str | Path, skiprows: int = 9) -> pd.DataFrame:
+    """DataFrame constructor to open Falmouth Scientific, Inc. (FSI) CTD
     ASCII format.
 
     Examples
@@ -485,26 +508,25 @@ def from_fsi(fname, skiprows=9):
 
     """
     f = _read_file(fname)
-    df = pd.read_csv(
+    fsi = pd.read_csv(
         f,
         header="infer",
         index_col=None,
         skiprows=skiprows,
         dtype=float,
-        delim_whitespace=True,
+        sep=r"\s+",
     )
     f.close()
 
-    df.set_index("PRES", drop=True, inplace=True)
-    df.index.name = "Pressure [dbar]"
+    fsi = fsi.set_index("PRES", drop=True)
+    fsi.index.name = "Pressure [dbar]"
     metadata = {"name": str(fname)}
-    df._metadata = metadata
-    return df
+    fsi._metadata = metadata  # noqa: SLF001
+    return fsi
 
 
-def rosette_summary(fname):
-    """
-    Make a BTL (bottle) file from a ROS (bottle log) file.
+def rosette_summary(fname: str | Path) -> pd.DataFrame:
+    """Make a BTL (bottle) file from a ROS (bottle log) file.
 
     More control for the averaging process and at which step we want to
     perform this averaging eliminating the need to read the data into SBE
@@ -519,23 +541,24 @@ def rosette_summary(fname):
     >>> fname = data_path.joinpath("CTD/g01l01s01.ros")
     >>> ros = ctd.rosette_summary(fname)
     >>> ros = ros.groupby(ros.index).mean()
-    >>> ros.pressure.values.astype(int)
+    >>> ros.pressure.to_numpy().astype(int)
     array([835, 806, 705, 604, 503, 404, 303, 201, 151, 100,  51,   1])
 
     """
     ros = from_cnv(fname)
-    ros["pressure"] = ros.index.values.astype(float)
+    ros["pressure"] = ros.index.to_numpy().astype(float)
     ros["nbf"] = ros["nbf"].astype(int)
-    ros.set_index("nbf", drop=True, inplace=True, verify_integrity=False)
+    metadata = ros._metadata  # noqa: SLF001
+    ros = ros.set_index("nbf", drop=True, verify_integrity=False)
+    ros._metadata = metadata  # noqa: SLF001
     return ros
 
 
-def from_castaway_csv(fname):
-    """
-    DataFrame constructor to open CastAway CSV format.
+def from_castaway_csv(fname: str | Path) -> pd.DataFrame:
+    """DataFrame constructor to open CastAway CSV format.
 
-    Example
-    --------
+    Example:
+    -------
     >>> import ctd
     >>> cast = ctd.from_castaway_csv("tests/data/castaway_data.csv")
     >>> cast.columns
@@ -544,27 +567,28 @@ def from_castaway_csv(fname):
           dtype='object')
 
     """
-    with open(fname) as file:
-        f = file.readlines()
+    f = _read_file(fname)
+    lines = f.readlines()
 
     # Strip newline characters
-    f = [s.strip() for s in f]
+    lines = [s.strip() for s in lines]
 
     # Separate meta data and CTD profile
-    meta = [s for s in f if s[0] == "%"][0:-1]
-    data = [s.split(",") for s in f if s[0] != "%"]
-    df = pd.DataFrame(data[1:-1], columns=data[0])
+    meta = [s for s in lines if s[0] == "%"][0:-1]
+    data = [s.split(",") for s in lines if s[0] != "%"]
+    cast = pd.DataFrame(data[1:-1], columns=data[0])
 
     # Convert to numeric
-    for col in df.columns:
-        df[col] = pd.to_numeric(df[col])
+    for col in cast.columns:
+        cast[col] = pd.to_numeric(cast[col])
 
     # Normalise column names and extract units
-    units = [s[s.find("(") + 1 : s.find(")")] for s in df.columns]
-    df.columns = [
-        _normalize_names(s.split("(")[0]).lower().replace(" ", "_") for s in df.columns
+    units = [s[s.find("(") + 1 : s.find(")")] for s in cast.columns]
+    cast.columns = [
+        _normalize_names(s.split("(")[0]).lower().replace(" ", "_")
+        for s in cast.columns
     ]
-    df.set_index("pressure", drop=True, inplace=True, verify_integrity=False)
+    cast = cast.set_index("pressure", drop=True, verify_integrity=False)
 
     # Add metadata
     meta = [s.replace("%", "").strip().split(",") for s in meta]
@@ -572,6 +596,6 @@ def from_castaway_csv(fname):
     for line in meta:
         metadata[line[0]] = line[1]
     metadata["units"] = units
-    df._metadata = metadata
+    cast._metadata = metadata  # noqa: SLF001
 
-    return df
+    return cast
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 01cbf0f..70a6a47 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -58,11 +58,9 @@
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
-from ctd import __version__ as VERSION
+from ctd import __version__
 
-version = VERSION
-# The full version, including alpha/beta/rc tags.
-release = VERSION
+version = release = __version__
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/notebooks/00-reading-data.ipynb b/notebooks/00-reading-data.ipynb
index 43167d3..87e5774 100644
--- a/notebooks/00-reading-data.ipynb
+++ b/notebooks/00-reading-data.ipynb
@@ -8,10 +8,8 @@
    "source": [
     "from pathlib import Path\n",
     "\n",
-    "import pandas as pd\n",
     "import ctd\n",
     "\n",
-    "\n",
     "path = Path(\"..\", \"tests\", \"data\")"
    ]
   },
@@ -58,7 +56,6 @@
    "source": [
     "from ctd import rosette_summary\n",
     "\n",
-    "\n",
     "ros = rosette_summary(path.joinpath(\"CTD\", \"g01l01s01.ros\"))\n",
     "ros = ros.groupby(ros.index).mean()\n",
     "\n",
diff --git a/notebooks/quick_intro.ipynb b/notebooks/quick_intro.ipynb
index 0e02e5b..91a6e9b 100644
--- a/notebooks/quick_intro.ipynb
+++ b/notebooks/quick_intro.ipynb
@@ -17,25 +17,15 @@
    },
    "outputs": [],
    "source": [
-    "import io\n",
-    "import requests\n",
-    "from pathlib import Path\n",
+    "import pooch\n",
     "\n",
+    "test_data = \"CTD-spiked-unfiltered.cnv.bz2\"\n",
+    "url = f\"https://github.com/pyoceans/python-ctd/raw/main/tests/data/{test_data}\"\n",
     "\n",
-    "def download_demo_file(url):\n",
-    "    if not Path(\"CTD-spiked-unfiltered.cnv.bz2\").exists():\n",
-    "        response = requests.get(url, allow_redirects=True)\n",
-    "        try:\n",
-    "            response.raise_for_status()\n",
-    "        except requests.exceptions.HTTPError as err:\n",
-    "            raise requests.exceptions.HTTPError(f\"{response.content.decode()}\") from err\n",
-    "        data = io.BytesIO(response.content)\n",
-    "        data.seek(0)\n",
-    "        Path(\"CTD-spiked-unfiltered.cnv.bz2\").write_bytes(data.read())\n",
-    "\n",
-    "\n",
-    "url = \"https://github.com/pyoceans/python-ctd/raw/main/tests/data/CTD-spiked-unfiltered.cnv.bz2\"\n",
-    "download_demo_file(url)"
+    "fname = pooch.retrieve(\n",
+    "    url=url,\n",
+    "    known_hash=\"sha256:1de4b7ce665d5cece925c5feb4552c13bbc19cef3e229bc87dfd77acb1a730d3\",\n",
+    ")"
    ]
   },
   {
@@ -46,8 +36,7 @@
    "source": [
     "import ctd\n",
     "\n",
-    "\n",
-    "cast = ctd.from_cnv(\"CTD-spiked-unfiltered.cnv.bz2\")\n",
+    "cast = ctd.from_cnv(fname)\n",
     "down, up = cast.split()\n",
     "\n",
     "down.head()"
@@ -84,7 +73,6 @@
    "source": [
     "from matplotlib import style\n",
     "\n",
-    "\n",
     "style.use(\"seaborn-v0_8-whitegrid\")\n",
     "\n",
     "down[\"t090C\"].plot_cast()\n",
@@ -169,7 +157,6 @@
    "source": [
     "import matplotlib.pyplot as plt\n",
     "\n",
-    "\n",
     "fig, ax = plt.subplots()\n",
     "ax.plot(down.index, label=\"unfiltered\")\n",
     "ax.plot(down.lp_filter().index, label=\"filtered\")\n",
@@ -233,7 +220,6 @@
    "source": [
     "import gsw\n",
     "\n",
-    "\n",
     "p = proc.index\n",
     "\n",
     "SP = gsw.SP_from_C(proc[\"c0S/m\"].to_numpy() * 10.0, proc[\"t090C\"].to_numpy(), p)\n",
@@ -318,7 +304,10 @@
     "ax1.set_xlabel(\"Absolute Salinity (g kg$^{-1}$)\")\n",
     "\n",
     "(l2,) = ax2.plot(\n",
-    "    proc[\"sigma0_CT\"], proc.index, color=colors[2], label=r\"$\\sigma_{0\\_CT}$\"\n",
+    "    proc[\"sigma0_CT\"],\n",
+    "    proc.index,\n",
+    "    color=colors[2],\n",
+    "    label=r\"$\\sigma_{0\\_CT}$\",\n",
     ")\n",
     "ax2.set_xlabel(r\"$\\sigma_{0\\_CT}$ (kg m$^{-3}$)\")\n",
     "\n",
@@ -360,7 +349,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.0"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,
diff --git a/pyproject.toml b/pyproject.toml
index c2a9344..c3b453c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -13,11 +13,9 @@ license = { text = "BSD-3-Clause" }
 authors = [
   { name = "Filipe Fernandes", email = "ocefpaf+ctd@gmail.com" },
 ]
-requires-python = ">=3.9"
+requires-python = ">=3.11"
 classifiers = [
   "Programming Language :: Python :: 3 :: Only",
-  "Programming Language :: Python :: 3.9",
-  "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
 ]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 736a9ad..1a84fe8 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,26 +1,12 @@
-black
 check-manifest
-doctr
-flake8
-flake8-builtins
-flake8-comprehensions
-flake8-mutable
-flake8-print
-ipykernel
-isort
-jupyter
-jupyter_client
 mypy
+nbclassic
 nbconvert
 nbsphinx
+pooch
 pre-commit
-pycodestyle
-pylint
 pytest
 pytest-cov
-pytest-flake8
-pytest-xdist
 setuptools_scm
 sphinx
 twine
-wheel
diff --git a/requirements.txt b/requirements.txt
index e722ff8..a8cf98b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,7 @@
 chardet
 gsw>=3.3.0
 matplotlib
-numpy
+numpy>=2
 pandas>=0.24.0
 pandas-flavor>=0.1.2
 scipy
diff --git a/ruff.toml b/ruff.toml
new file mode 100644
index 0000000..cf3061a
--- /dev/null
+++ b/ruff.toml
@@ -0,0 +1,53 @@
+line-length = 80
+
+lint.select = ["ALL"]
+
+lint.ignore = [
+  "D203",  # 1 blank line required before class docstring
+  "D205",  # 1 blank line required between summary line and description
+  "D213",  # incompatible. Ignoring `multi-line-summary-second-line`
+  "TRY003",  # Avoid specifying long messages outside the exception class
+]
+
+[lint.extend-per-file-ignores]
+"docs/source/conf.py" = [
+  "A001",  # builtin-variable-shadowing
+  "D100",  # Missing docstring in public module
+  "E402",  # Module level import not at top of file
+  "ERA001",  # Found commented-out code
+  "ERA001",  # Found commented-out code
+  "EXE001",  # Shebang is present but file is not executable
+]
+"test_*.py" = [
+  "ANN001",  # Missing type annotation for function argument
+  "ANN201",  # Missing return type annotation for public function
+  "ANN202",  # Missing return type annotation for private function
+  "INP001",  # File is part of an implicit namespace package
+  "PD901",  # Avoid using the generic variable name `df` for DataFrames
+  "S101",  # Use of assert detected
+  "ANN002",  # Missing type annotation for `*args`
+  "ANN003",  # Missing type annotation for `**kwargs`
+]
+"ctd/extras.py" = [
+  "ANN001",  # Missing type annotation for function argument
+  "ANN201",  # Missing return type annotation for public function
+  "ANN202",  # Missing return type annotation for private function
+]
+# nbqa-ruff acts on converted .py so we cannot glob .ipynb :-/
+# https://github.com/nbQA-dev/nbQA/issues/823
+"notebooks/*" = [
+    "ANN001",  # Missing type annotation for function argument
+    "ANN201",  # Missing return type annotation for public function
+    "B018",  # Found useless expression. Either assign it to a variable or remove it
+    "D100",  # Missing docstring in public module
+    "D103",  # Missing docstring in public function
+    "E402",  # Module level import not at top of file
+    "FBT003",  # Boolean positional value in function call
+    "INP001",  # File is part of an implicit namespace package
+    "N816",  # Variable in global scope should not be mixedCase
+    "PD901",  # Avoid using the generic variable name `df` for DataFrames
+    "SLF001",  # Private member accessed
+    "T201",  # `print` found"
+]
+[lint.pycodestyle]
+max-doc-length = 180
diff --git a/tests/test_plotting.py b/tests/test_plotting.py
index 89edafa..d1b5230 100644
--- a/tests/test_plotting.py
+++ b/tests/test_plotting.py
@@ -1,13 +1,15 @@
+"""Test plotting."""
+
 from pathlib import Path
 
-import matplotlib
+import matplotlib as mpl
 import matplotlib.pyplot as plt
 import numpy as np
 import pytest
 
 import ctd
 
-matplotlib.use("Agg")
+mpl.use("Agg")
 
 data_path = Path(__file__).parent.joinpath("data")
 
@@ -24,7 +26,8 @@ def _assert_is_valid_plot_return_object(objs):
     else:
         assert isinstance(objs, (plt.Artist, tuple, dict)), (
             "objs is neither an ndarray of Artist instances nor a "
-            f'single Artist instance, tuple, or dict, "objs" is a {objs.__class__.__name__!r} '
+            "single Artist instance, tuple, or dict, "
+            f'"objs" is a {objs.__class__.__name__!r} '
             ""
         )
 
@@ -36,28 +39,38 @@ def _check_plot_works(f, *args, **kwargs):
     plt.close()
 
 
-# BasicPlotting.
-@pytest.fixture
+# Basic Plotting.
+@pytest.fixture()
 def xbt():
+    """Load XBT."""
     yield ctd.from_edf(data_path.joinpath("XBT.EDF.zip"))
     plt.close("all")
 
 
-@pytest.fixture
+@pytest.fixture()
 def fsi():
+    """Load FSI."""
     yield ctd.from_fsi(data_path.joinpath("FSI.txt.gz"), skiprows=9)
     plt.close("all")
 
 
-@pytest.fixture
+@pytest.fixture()
 def cnv():
+    """Load CNV."""
     yield ctd.from_cnv(data_path.joinpath("small.cnv.bz2"))
     plt.close("all")
 
 
 def test_xbt_plot(xbt):
+    """Test plotting XBT."""
     _check_plot_works(xbt["temperature"].plot_cast)
 
 
+def test_fsi_temperature(fsi):
+    """Test plotting CNV."""
+    _check_plot_works(fsi["TEMP"].plot_cast)
+
+
 def test_cnv_temperature(cnv):
+    """Test plotting CNV."""
     _check_plot_works(cnv["t090C"].plot_cast)
diff --git a/tests/test_processing.py b/tests/test_processing.py
index 2d9b651..2e2f781 100644
--- a/tests/test_processing.py
+++ b/tests/test_processing.py
@@ -1,45 +1,52 @@
+"""Test processing methods."""
+
 import numpy as np
 import pandas as pd
 import pytest
 
-import ctd  # noqa
-
 
-@pytest.fixture
+@pytest.fixture()
 def series():
+    """Load data series."""
     index = np.r_[np.linspace(-5, 10, 20), np.linspace(10, -5, 20)]
-    yield pd.Series(data=np.arange(len(index)), index=index)
+    return pd.Series(data=np.arange(len(index)), index=index)
 
 
-@pytest.fixture
+@pytest.fixture()
 def df():
+    """Load data frame."""
     index = np.r_[np.linspace(-5, 10, 20), np.linspace(10, -5, 20)]
-    yield pd.DataFrame(data=np.arange(len(index)), index=index)
+    return pd.DataFrame(data=np.arange(len(index)), index=index)
 
 
 def test_remove_above_water_series(series):
+    """Test remove above water series."""
     assert any(series.index < 0)
     assert not any(series.remove_above_water().index < 0)
 
 
 def test_remove_above_water_df(df):
+    """Test remove above water dataframe."""
     assert any(df.index < 0)
     assert not any(df.remove_above_water().index < 0)
 
 
 def test_remove_up_to_series(series):
+    """Test remove up to series."""
     idx = 10
     assert any(series.index < idx)
     assert not any(series.remove_up_to(idx=idx).index < idx)
 
 
 def test_remove_up_to_df(df):
+    """Test remove up dataframe."""
     idx = 10
     assert any(df.index < idx)
     assert not any(df.remove_up_to(idx=idx).index < idx)
 
 
 def test_split_series(series):
+    """Test split series."""
     split = series.split()
     down, up = split
     assert isinstance(split, tuple)
@@ -47,6 +54,7 @@ def test_split_series(series):
 
 
 def test_split_df(df):
+    """Test split dataframe."""
     split = df.split()
     down, up = split
     assert isinstance(split, tuple)
@@ -54,18 +62,26 @@ def test_split_df(df):
 
 
 def test_press_check_series(series):
-    # reverse 7th and 9th and confirm they are removed after the `press_check`.
+    """Test pressure check series.
+
+    Reverse 7th and 9th and confirm they are removed after the `press_check`.
+    """
     index = [0, 1, 2, 3, 4, 5, 7, 6, 9, 8, 10]
-    series = pd.Series(data=np.random.randn(len(index)), index=index)
+    rng = np.random.default_rng()
+    series = pd.Series(data=rng.standard_normal(len(index)), index=index)
     series = series.press_check()
     assert np.isnan(series.iloc[7])
     assert np.isnan(series.iloc[9])
 
 
 def test_press_check_df(df):
-    # reverse 7th and 9th and confirm they are removed after the `press_check`.
+    """Test pressure check dataframe.
+
+    Reverse 7th and 9th and confirm they are removed after the `press_check`.
+    """
     index = [0, 1, 2, 3, 4, 5, 7, 6, 9, 8, 10]
-    arr = np.random.randn(len(index))
+    rng = np.random.default_rng()
+    arr = rng.standard_normal(len(index))
     df = pd.DataFrame(data=np.c_[arr, arr], index=index)
     df = df.press_check()
     assert np.isnan(df.iloc[7]).all()
@@ -73,12 +89,13 @@ def test_press_check_df(df):
 
 
 def test_bindata_average(series):
+    """Test bin data."""
     delta = 1.0
     index = series.remove_above_water().split()[0].bindata(delta=delta).index
-    assert all(index.values == np.arange(1, 9, delta) + delta / 2)
-    assert np.unique(np.diff(index.values)) == delta
+    assert all(index.to_numpy() == np.arange(1, 9, delta) + delta / 2)
+    assert np.unique(np.diff(index.to_numpy())) == delta
 
     delta = 2
     index = series.remove_above_water().split()[0].bindata(delta=delta).index
-    assert all(index.values == np.arange(1, 9, delta) + delta / 2)
-    assert np.unique(np.diff(index.values)) == delta
+    assert all(index.to_numpy() == np.arange(1, 9, delta) + delta / 2)
+    assert np.unique(np.diff(index.to_numpy())) == delta
diff --git a/tests/test_processing_real_data.py b/tests/test_processing_real_data.py
index 72f4043..4fd2835 100644
--- a/tests/test_processing_real_data.py
+++ b/tests/test_processing_real_data.py
@@ -1,3 +1,5 @@
+"""Test processing real data."""
+
 from pathlib import Path
 
 import numpy as np
@@ -8,42 +10,50 @@
 data_path = Path(__file__).parent.joinpath("data")
 
 
-@pytest.fixture
+@pytest.fixture()
 def spiked_ctd():
-    yield ctd.from_cnv(data_path.joinpath("CTD-spiked-unfiltered.cnv.bz2"))
+    """Load spiked CTD."""
+    return ctd.from_cnv(data_path.joinpath("CTD-spiked-unfiltered.cnv.bz2"))
 
 
-@pytest.fixture
+@pytest.fixture()
 def filtered_ctd():
-    yield ctd.from_cnv(data_path.joinpath("CTD-spiked-filtered.cnv.bz2"))
+    """Load spiked-filtered CTD."""
+    return ctd.from_cnv(data_path.joinpath("CTD-spiked-filtered.cnv.bz2"))
 
 
 def test_despike_real_data(filtered_ctd):
+    """Test despike."""
     # Looking at downcast only.
     dirty = filtered_ctd["c0S/m"].split()[0]
     clean = dirty.despike(n1=2, n2=20, block=500)
-    spikes = clean.isnull()
+    spikes = clean.isna()
     equal = (dirty[~spikes] == clean[~spikes]).all()
-    assert spikes.any() and equal
+    assert spikes.any()
+    assert equal
 
 
 def test_lp_filter_real_data(spiked_ctd, filtered_ctd):
+    """Test low pass filter."""
     kw = {"sample_rate": 24.0, "time_constant": 0.15}
-    expected = filtered_ctd.index.values
+    expected = filtered_ctd.index.to_numpy()
     filtered = spiked_ctd.lp_filter(**kw).index
     # Caveat: Not really a good test...
     np.testing.assert_almost_equal(filtered, expected, decimal=1)
 
 
 def test_press_check_real_data(spiked_ctd):
+    """Test pressure check."""
     unchecked = spiked_ctd["t090C"]
     press_checked = unchecked.press_check()
-    reversals = press_checked.isnull()
+    reversals = press_checked.isna()
     equal = (unchecked[~reversals] == press_checked[~reversals]).all()
-    assert reversals.any() and equal
+    assert reversals.any()
+    assert equal
 
 
 def test_processing_chain_spiked_ctd(spiked_ctd):
+    """Test all processing steps chained."""
     down, up = spiked_ctd.remove_above_water().split()
     temp = down["t090C"]  # despike is a series only method
     temp = (
diff --git a/tests/test_read.py b/tests/test_read.py
index f81d51a..d0e3bbb 100644
--- a/tests/test_read.py
+++ b/tests/test_read.py
@@ -1,4 +1,6 @@
-from io import StringIO
+"""Test reading functionality."""
+
+import io
 from pathlib import Path
 
 import numpy as np
@@ -13,118 +15,142 @@
 
 # Test `_read_file` and `_open_compressed`.
 def test_zip():
+    """Test reading from zip."""
     cfile = _read_file(data_path.joinpath("XBT.EDF.zip"))
-    assert isinstance(cfile, StringIO)
+    assert isinstance(cfile, io.StringIO)
 
 
 def test_gzip():
+    """Test reading from gzip."""
     cfile = _read_file(data_path.joinpath("XBT.EDF.gz"))
-    assert isinstance(cfile, StringIO)
+    assert isinstance(cfile, io.StringIO)
 
 
 def test_bz2():
+    """Test reading from bzip2."""
     cfile = _read_file(data_path.joinpath("XBT.EDF.bz2"))
-    assert isinstance(cfile, StringIO)
+    assert isinstance(cfile, io.StringIO)
 
 
 def test_uncompresed():
+    """Test reading from uncompressed file."""
     cfile = _read_file(data_path.joinpath("XBT.EDF"))
-    assert isinstance(cfile, StringIO)
+    assert isinstance(cfile, io.StringIO)
 
 
 # Test ctd DataFrame.
-@pytest.fixture
+@pytest.fixture()
 def xbt():
-    yield ctd.from_edf(data_path.joinpath("XBT.EDF.zip"))
+    """Load zip EDF file."""
+    return ctd.from_edf(data_path.joinpath("XBT.EDF.zip"))
 
 
-@pytest.fixture
+@pytest.fixture()
 def fsi():
-    yield ctd.from_fsi(data_path.joinpath("FSI.txt.gz"), skiprows=9)
+    """Load gzip FSI file."""
+    return ctd.from_fsi(data_path.joinpath("FSI.txt.gz"), skiprows=9)
 
 
-@pytest.fixture
+@pytest.fixture()
 def cnv():
-    yield ctd.from_cnv(data_path.joinpath("small.cnv.bz2"))
+    """Load bzip2 CNV file."""
+    return ctd.from_cnv(data_path.joinpath("small.cnv.bz2"))
 
 
-@pytest.fixture
+@pytest.fixture()
 def btl():
-    yield ctd.from_btl(data_path.joinpath("btl", "bottletest.btl"))
+    """Load uncompressed BTL file."""
+    return ctd.from_btl(data_path.joinpath("btl", "bottletest.btl"))
 
 
-@pytest.fixture
+@pytest.fixture()
 def btl_as_stream():
-    file = open(mode="rb", file=data_path.joinpath("btl", "alt_bottletest.BTL"))
-    stream = StringIO(file.read().decode("cp1252"))
-    yield ctd.from_btl(stream)
+    """Load stream BTL data."""
+    with Path.open(
+        data_path.joinpath("btl", "alt_bottletest.BTL"),
+        mode="rb",
+    ) as f:
+        stream = io.StringIO(f.read().decode("cp1252"))
+    return ctd.from_btl(stream)
 
 
-@pytest.fixture
+@pytest.fixture()
 def ros():
-    yield ctd.rosette_summary(data_path.joinpath("CTD", "g01l03s01m-m2.ros"))
+    """Load uncompressed ROS file."""
+    return ctd.rosette_summary(data_path.joinpath("CTD", "g01l03s01m-m2.ros"))
 
 
 def test_xbt_is_dataframe(xbt):
+    """Test XBT."""
     assert isinstance(xbt, pd.DataFrame)
     assert not xbt.empty
 
 
 def test_fsi_is_dataframe(fsi):
+    """Test FSI."""
     assert isinstance(fsi, pd.DataFrame)
     assert not fsi.empty
 
 
 def test_cnv_is_dataframe(cnv):
+    """Test CNV."""
     assert isinstance(cnv, pd.DataFrame)
     assert not cnv.empty
 
 
 def test_btl_is_dataframe(btl):
+    """Test BTL."""
     assert isinstance(btl, pd.DataFrame)
     assert not btl.empty
 
 
 def test_btl_with_dup_cols(btl_as_stream):
+    """Test BTL with duplicated columns."""
     assert all(col in btl_as_stream.columns for col in ["Bottle", "Bottle_"])
 
 
 def test_btl_as_stringio(btl_as_stream):
+    """Test BTL from stream."""
     assert isinstance(btl_as_stream, pd.DataFrame)
     assert not btl_as_stream.empty
 
 
 def test_ros_is_dataframe(ros):
+    """Test ROS."""
     assert isinstance(ros, pd.DataFrame)
     assert not ros.empty
 
 
-# if missing the 'File Name' in the header the dataframe._metadata['name'] should be set to 'unknown'
-def test_ros_no_file_name(ros):
-    file = open(mode="rb", file=data_path.joinpath("CTD", "fixstation_hl_02.ros"))
-    stream = StringIO(file.read().decode("cp1252"))
+def test_ros_no_file_name():
+    """Test is if missing the 'File Name' is set to 'unknown'."""
+    with Path.open(
+        data_path.joinpath("CTD", "fixstation_hl_02.ros"),
+        mode="rb",
+    ) as f:
+        stream = io.StringIO(f.read().decode("cp1252"))
     data = ctd.rosette_summary(stream)
-    assert data._metadata["name"] == "unknown"
+    assert data._metadata["name"] == "unknown"  # noqa: SLF001
 
 
-# HeaderTest.
 def test_header_parse():
-    # file with missing positions
+    """Test header parsing."""
+    # File with missing positions.
     xbt = ctd.from_edf(data_path.joinpath("C3_00005.edf"))
-    assert xbt._metadata["lon"] is None
-    assert xbt._metadata["lat"] is None
+    assert xbt._metadata["lon"] is None  # noqa: SLF001
+    assert xbt._metadata["lat"] is None  # noqa: SLF001
 
-    # file with valid positions
+    # File with valid positions.
     xbt = ctd.from_edf(data_path.joinpath("XBT.EDF"))
-    np.testing.assert_almost_equal(xbt._metadata["lon"], -39.8790283)
-    np.testing.assert_almost_equal(xbt._metadata["lat"], -19.7174805)
+    np.testing.assert_almost_equal(xbt._metadata["lon"], -39.8790283)  # noqa: SLF001
+    np.testing.assert_almost_equal(xbt._metadata["lat"], -19.7174805)  # noqa: SLF001
 
 
 def test_header_parse_blank_line():
-    # check that a BTL file can still be loaded if the header section contains blank lines
+    """Check if file is loaded when the header section contains blank lines.
 
-    # if the blank line in the header causes the reader to exit before reading the file
-    # the line looking for the Date in the ctd.from_btl() will throw a ValueError.
+    If the blank line in the header causes to exit before reading it,
+    the line looking for the Date in the `from_btl` will throw a ValueError.
+    """
     btl = ctd.from_btl(
         data_path.joinpath(
             "btl",
@@ -132,17 +158,16 @@ def test_header_parse_blank_line():
         ),
     )
 
-    # if a value error wasn't thrown, ensure the names array for the _metadata was set
-    assert btl._metadata["names"].index("Date")
+    assert btl._metadata["names"].index("Date")  # noqa: SLF001
 
 
 def test_pressure_field_labels():
-    """
-    Support different pressure field labels encountered in Sea-Bird cnv files.
-
-    """
+    """Support different pressure field labels encountered in CNV files."""
     for fname in sorted(data_path.glob("press-pass*.cnv")):
         ctd.from_cnv(fname)
     for fname in sorted(data_path.glob("press-fails*.cnv")):
-        with pytest.raises(ValueError):
+        with pytest.raises(
+            ValueError,
+            match="Expected one pressure/depth column, didn't receive any",
+        ):
             ctd.from_cnv(fname)