Fixed missing parameter definitions in docstrings

seanlaw · seanlaw · commit ac238b61897d · 2022-12-03T05:23:45.000-05:00
diff --git a/docstring.py b/docstring.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+import pathlib
+import ast
+import re
+
+
+def get_docstring_args(docstring):
+    """
+    Extract parameter arguments from docstring
+    """
+    args = re.findall(r"(\w+)\s+\:", docstring)
+    args = [a for a in args if a != "self"]
+    return args
+
+
+ignore = ["__init__.py", "__pycache__"]
+
+stumpy_path = pathlib.Path(__file__).parent / "stumpy"
+filepaths = sorted(f for f in pathlib.Path(stumpy_path).iterdir() if f.is_file())
+for filepath in filepaths:
+    if filepath.name not in ignore and str(filepath).endswith(".py"):
+        file_contents = ""
+        with open(filepath, encoding="utf8") as f:
+            file_contents = f.read()
+        module = ast.parse(file_contents)
+
+        # Check Functions
+        function_definitions = [
+            node for node in module.body if isinstance(node, ast.FunctionDef)
+        ]
+        for fd in function_definitions:
+            docstring_args = set(get_docstring_args(ast.get_docstring(fd)))
+            signature_args = set([a.arg for a in fd.args.args])
+            diff_args = signature_args.difference(docstring_args)
+            if len(diff_args) > 0:
+                print("Found one or more parameters with missing docstring:")
+                print(f"    File: {filepath.name}")
+                print(f"    Function: {fd.name}")
+                print(f"    Parameters: {diff_args}")
+                # print(ast.get_docstring(fd))
+                # print(docstring_args)
+                # print(signature_args)
+
+        # Check Class Methods
+        class_definitions = [
+            node for node in module.body if isinstance(node, ast.ClassDef)
+        ]
+        for cd in class_definitions:
+            methods = [node for node in cd.body if isinstance(node, ast.FunctionDef)]
+            for fd in methods:
+                docstring_args = set(get_docstring_args(ast.get_docstring(fd)))
+                signature_args = set([a.arg for a in fd.args.args if a.arg != "self"])
+                diff_args = signature_args.difference(docstring_args)
+                if len(diff_args) > 0:
+                    print("Found one or more parameters with missing docstring:")
+                    print(f"    File: {filepath.name}")
+                    print(f"    Class: {cd.name}")
+                    print(f"    Method: {fd.name}")
+                    print(f"    Parameters: {diff_args}")
+                    # print(ast.get_docstring(fd))
+                    # print(docstring_args)
+                    # print(signature_args)
diff --git a/stumpy/aamp_mmotifs.py b/stumpy/aamp_mmotifs.py
@@ -33,70 +33,70 @@ def aamp_mmotifs(
 
     Parameters
     ----------
-    T: numpy.ndarray
+    T : numpy.ndarray
         The multi-dimensional time series or sequence
 
-    P: numpy.ndarray
+    P : numpy.ndarray
         Multi-dimensional Matrix Profile of T
 
-    I: numpy.ndarray
+    I : numpy.ndarray
         Multi-dimensional Matrix Profile indices
 
     min_neighbors : int, default 1
         The minimum number of similar matches a subsequence needs to have in order
         to be considered a motif. This defaults to `1`, which means that a subsequence
         must have at least one similar match in order to be considered a motif.
 
-    max_distance: flaot, default None
+    max_distance : flaot, default None
         Maximal distance that is allowed between a query subsequence
         (a candidate motif) and all subsequences in T to be considered as a match.
         If None, this defaults to
         `np.nanmax([np.nanmean(D) - 2 * np.nanstd(D), np.nanmin(D)])`
         (i.e. at least the closest match will be returned).
 
-    cutoffs: numpy.ndarray or float, default None
+    cutoffs : numpy.ndarray or float, default None
         The largest matrix profile value (distance) for each dimension of the
         multidimensional matrix profile that a multidimenisonal candidate motif is
         allowed to have. If `cutoffs` is a scalar value, then this value will be
         applied to every dimension.
 
-    max_matches: int, default 10
+    max_matches : int, default 10
         The maximum number of similar matches (nearest neighbors) to return for each
         motif. The first match is always the self/trivial-match for each motif.
 
-    max_motifs: int, default 1
+    max_motifs : int, default 1
         The maximum number of motifs to return
 
-    atol: float, default 1e-8
+    atol : float, default 1e-8
         The absolute tolerance parameter. This value will be added to `max_distance`
         when comparing distances between subsequences.
 
-    k: int, default None
+    k : int, default None
         The number of dimensions (`k + 1`) required for discovering all motifs. This
         value is available for doing guided search or, together with `include`, for
         constrained search. If `k is None`, then this will be automatically be computed
         for each motif using MDL (unconstrained search).
 
-    include: numpy.ndarray, default None
+    include : numpy.ndarray, default None
         A list of (zero based) indices corresponding to the dimensions in T that must be
         included in the constrained multidimensional motif search.
 
-    p: float, default 2.0
+    p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
     Returns
     -------
-    motif_distances: numpy.ndarray
+    motif_distances : numpy.ndarray
         The distances corresponding to a set of subsequence matches for each motif.
 
-    motif_indices: numpy.ndarray
+    motif_indices : numpy.ndarray
         The indices corresponding to a set of subsequences matches for each motif.
 
-    motif_subspaces: list
+    motif_subspaces : list
         A list consisting of arrays that contain the `k`-dimensional
         subspace for each motif.
 
-    motif_mdls: list
+    motif_mdls : list
         A list consisting of arrays that contain the mdl results for
         finding the dimension of each motif
 
diff --git a/stumpy/aamp_stimp.py b/stumpy/aamp_stimp.py
@@ -550,6 +550,9 @@ def __init__(
 
         step : int, default 1
             The step between subsequence window sizes
+
+        p : float, default 2.0
+            The p-norm to apply for computing the Minkowski distance.
         """
         super().__init__(
             T,
diff --git a/stumpy/aampi.py b/stumpy/aampi.py
@@ -168,6 +168,11 @@ def _update_egress(self, t):
         """
         Ingress a new data point, egress the oldest data point, and update the matrix
         profile and matrix profile indices
+
+        Parameters
+        ----------
+        t : float
+            A single new data point to be appended to `T`
         """
         self._n = self._T.shape[0]
         l = self._n - self._m + 1 - 1  # Subtract 1 due to egress
@@ -245,6 +250,11 @@ def _update(self, t):
         """
         Ingress a new data point and update the (top-k) matrix profile and matrix
         profile indices without egressing the oldest data point
+
+        Parameters
+        ----------
+        t : float
+            A single new data point to be appended to `T`
         """
         self._n = self._T.shape[0]
         l = self._n - self._m + 1
diff --git a/stumpy/core.py b/stumpy/core.py
@@ -2750,19 +2750,19 @@ def _shift_insert_at_index(a, idx, v, shift="right"):
 
     Parameters
     ----------
-    a: numpy.ndarray
+    a : numpy.ndarray
         A 1d array
 
-    idx: int
+    idx : int
         The index at which the value `v` should be inserted. This can be any
         integer number from `0` to `len(a)`. When `idx=len(a)` and `shift="right"`,
         OR when `idx=0` and `shift="left"`, then no change will occur on
         the input array `a`.
 
-    v: float
+    v : float
         The value that should be inserted into array `a` at index `idx`
 
-    shift: str, default "right"
+    shift : str, default "right"
         The value that indicates whether the shifting of elements should be towards
         the right or left. If `shift="right"` (default), all elements in `a[idx:]`
         are shifted to the right by one element. If `shift="left"`, all elements
diff --git a/stumpy/gpu_aamp.py b/stumpy/gpu_aamp.py
@@ -115,6 +115,14 @@ def _compute_and_update_PI_kernel(
     compute_p_norm : bool
         A boolean flag for whether or not to compute the p-norm
 
+    bfs : numpy.ndarray
+        The breadth-first-search indices where the missing leaves of its corresponding
+        binary search tree are filled with -1.
+
+    nlevel : int
+        The number of levels in the binary search tree from which the array
+        `bfs` is obtained.
+
     k : int
         The number of top `k` smallest distances used to construct the matrix profile.
         Note that this will increase the total computational time and memory usage
diff --git a/stumpy/maamp.py b/stumpy/maamp.py
@@ -721,7 +721,7 @@ def _maamp(
 
     Parameters
     ----------
-    T: numpy.ndarray
+    T : numpy.ndarray
         The time series or sequence for which to compute the multi-dimensional
         matrix profile
 
diff --git a/stumpy/mmotifs.py b/stumpy/mmotifs.py
@@ -33,51 +33,51 @@ def mmotifs(
 
     Parameters
     ----------
-    T: numpy.ndarray
+    T : numpy.ndarray
         The multi-dimensional time series or sequence
 
-    P: numpy.ndarray
+    P : numpy.ndarray
         Multi-dimensional Matrix Profile of T
 
-    I: numpy.ndarray
+    I : numpy.ndarray
         Multi-dimensional Matrix Profile indices
 
     min_neighbors : int, default 1
         The minimum number of similar matches a subsequence needs to have in order
         to be considered a motif. This defaults to `1`, which means that a subsequence
         must have at least one similar match in order to be considered a motif.
 
-    max_distance: flaot, default None
+    max_distance : flaot, default None
         Maximal distance that is allowed between a query subsequence
         (a candidate motif) and all subsequences in T to be considered as a match.
         If None, this defaults to
         `np.nanmax([np.nanmean(D) - 2 * np.nanstd(D), np.nanmin(D)])`
         (i.e. at least the closest match will be returned).
 
-    cutoffs: numpy.ndarray or float, default None
+    cutoffs : numpy.ndarray or float, default None
         The largest matrix profile value (distance) for each dimension of the
         multidimensional matrix profile that a multidimenisonal candidate motif is
         allowed to have. If `cutoffs` is a scalar value, then this value will be
         applied to every dimension.
 
-    max_matches: int, default 10
+    max_matches : int, default 10
         The maximum number of similar matches (nearest neighbors) to return for each
         motif. The first match is always the self/trivial-match for each motif.
 
-    max_motifs: int, default 1
+    max_motifs : int, default 1
         The maximum number of motifs to return
 
     atol : float, default 1e-8
         The absolute tolerance parameter. This value will be added to `max_distance`
         when comparing distances between subsequences.
 
-    k: int, default None
+    k : int, default None
         The number of dimensions (`k + 1`) required for discovering all motifs. This
         value is available for doing guided search or, together with `include`, for
         constrained search. If `k is None`, then this will be automatically be computed
         for each motif using MDL (unconstrained search).
 
-    include: numpy.ndarray, default None
+    include : numpy.ndarray, default None
         A list of (zero based) indices corresponding to the dimensions in T that must be
         included in the constrained multidimensional motif search.
 
diff --git a/stumpy/mstump.py b/stumpy/mstump.py
@@ -1007,7 +1007,7 @@ def _mstump(
 
     Parameters
     ----------
-    T: numpy.ndarray
+    T : numpy.ndarray
         The time series or sequence for which to compute the multi-dimensional
         matrix profile
 
diff --git a/stumpy/scraamp.py b/stumpy/scraamp.py
@@ -331,7 +331,7 @@ def _prescraamp(
     p : float, default 2.0
         The p-norm to apply for computing the Minkowski distance.
 
-    i : int
+    indices : int
         The subsequence index in `T_B` that corresponds to `Q`
 
     s : int
diff --git a/stumpy/stumpi.py b/stumpy/stumpi.py
@@ -199,6 +199,11 @@ def _update_egress(self, t):
         """
         Ingress a new data point, egress the oldest data point, and update the (top-k)
         matrix profile and matrix profile indices
+
+        Parameters
+        ----------
+        t : float
+            A single new data point to be appended to `T`
         """
         self._n = self._T.shape[0]
         l = self._n - self._m + 1 - 1  # Subtract 1 due to egress
@@ -278,6 +283,11 @@ def _update(self, t):
         """
         Ingress a new data point and update the (top-k) matrix profile and matrix
         profile indices without egressing the oldest data point
+
+        Parameters
+        ----------
+        t : float
+            A single new data point to be appended to `T`
         """
         n = self._T.shape[0]
         l = n - self._m + 1
diff --git a/test.sh b/test.sh
@@ -202,7 +202,7 @@ test_coverage()
         coverage run --append --source=. -m pytest -x -W ignore::RuntimeWarning -W ignore::DeprecationWarning $testfile
         check_errs $?
     done
-    coverage report -m --skip-covered --omit=setup.py
+    coverage report -m --skip-covered --omit=setup.py,docstring.py
 }
 
 check_links()