replace asserts by check and runtime error

t-vi · t-vi · commit ff85ff052262 · 2021-03-07T21:21:40.000+01:00
diff --git a/notebooks/deployment_monitoring_example.ipynb b/notebooks/deployment_monitoring_example.ipynb
@@ -397,14 +397,6 @@
    "source": [
     "So in this notebook we saw how to use model hooks with the drift detector to automatically set of the alarm when something bad happens. Just remember that if you set the p-value to $x\\%$ you expect to get a false alarm every $100\\%/x\\%$ batches to not spam your emergency contact."
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "careful-exposure",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/notebooks/drift_detection_overview.ipynb b/notebooks/drift_detection_overview.ipynb
@@ -282,14 +282,6 @@
     "\n",
     "To make this operational, we can get out our toolbox of classifiers, e.g. Neural Networks and Nearest-Neighbor ones, see [D. Lopez-Paz, M. Oquab: Revisiting classifier two-sample tests, ICLR 2017](https://arxiv.org/abs/1610.06545). Note that this approach can be data-intensive: To execute, we need to split the samples $x^{ref}_i$ and $x_i$ into train and test samples. When using neural networks, we also need to train the classifier, adding computational requirements. When we have enough data and time, we may hope that such a classification-based approach may be highly effective.\n"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "loaded-quebec",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/test/test_corruption_functions.py b/test/test_corruption_functions.py
@@ -39,7 +39,9 @@ def test_gaussian_blur():
     a1 = torchdrift.data.functional.gaussian_blur(a, severity=5)
     a2 = scipy.ndimage.gaussian_filter(a, [0, 0, 6, 6])
     assert ((a1 - a2)[:, :, 32:-32, 32:-32]).max().abs() < 1e-2
-
+    with pytest.raises(RuntimeError):
+        a3 = torchdrift.data.functional.gaussian_blur(a, severity=6)
+        
 
 if __name__ == "__main__":
     pytest.main([__file__])
diff --git a/test/test_functions.py b/test/test_functions.py
diff --git a/torchdrift/data/functional/corruption_functions.py b/torchdrift/data/functional/corruption_functions.py
@@ -26,6 +26,7 @@
 from torch import Tensor
 import torch
 import math
+import torchdrift.utils
 
 __all__ = []
 
@@ -49,7 +50,9 @@ def _export(fn):
 
 
 def interpolate_severity(img, cifar, imagenet, severity):
-    assert severity >= 1 and severity <= 5
+    torchdrift.utils.check(
+        severity >= 1 and severity <= 5, "severity needs to be between 1 and 5"
+    )
     length = (img.size(-1) * img.size(-2)) ** 0.5
     alpha = max(min((length - 32) / (224 - 32), 1), 0)
     res = (1 - alpha) * cifar[severity - 1] + alpha * imagenet[severity - 1]
diff --git a/torchdrift/detectors/detector.py b/torchdrift/detectors/detector.py
@@ -1,4 +1,5 @@
 import torch
+import torchdrift.utils
 
 
 class Detector(torch.nn.Module):
@@ -35,7 +36,9 @@ def compute_p_value(self, inputs: torch.Tensor) -> torch.Tensor:
         """Performs a statistical test for drift and returns the p-value.
 
         This method calls `predict_shift_from_features` under the hood, so you only need to override that when subclassing."""
-        assert self.base_outputs is not None, "Please call fit before compute_p_value"
+        torchdrift.utils.check(
+            self.base_outputs is not None, "Please call fit before compute_p_value"
+        )
         _, p_value = self.predict_shift_from_features(
             self.base_outputs, inputs, compute_score=False, compute_p_value=True
         )
@@ -47,7 +50,9 @@ def forward(
         """Performs a statistical test for drift and returns the score or, if `return_p_value` has been set in the constructor, the p-value.
 
         This method calls `predict_shift_from_features` under the hood, so you only need to override that when subclassing."""
-        assert self.base_outputs is not None, "Please call fit before predict_shift"
+        torchdrift.utils.check(
+            self.base_outputs is not None, "Please call fit before predict_shift"
+        )
         ood_score, p_value = self.predict_shift_from_features(
             self.base_outputs,
             inputs,
diff --git a/torchdrift/detectors/ks.py b/torchdrift/detectors/ks.py
@@ -4,6 +4,7 @@
 import numpy
 
 from . import Detector
+import torchdrift.utils
 
 try:
     import numba
@@ -60,7 +61,7 @@ def ks_two_sample_multi_dim(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
     """
     n_x, n_features = x.shape
     n_y, n_features_y = y.shape
-    assert n_features == n_features_y
+    torchdrift.utils.check(n_features == n_features_y, "feature dimension mismatch")
 
     joint_sorted = torch.argsort(torch.cat([x, y], dim=0), dim=0)
     sign = (joint_sorted < n_x).to(dtype=torch.float) * (1 / (n_x) + 1 / (n_y)) - (
@@ -92,9 +93,9 @@ def predict_shift_from_features(
         compute_p_value: bool,
         individual_samples: bool = False,
     ):
-        assert (
-            not individual_samples
-        ), "Individual samples not supported by MMD detector"
+        torchdrift.utils.check(
+            not individual_samples, "Individual samples not supported by MMD detector"
+        )
         ood_score = ks_two_sample_multi_dim(outputs, self.base_outputs)
         # Like failing loudly suggests to return the minimum p-value under the
         # label Bonferroni correction, this would correspond to the maximum score
diff --git a/torchdrift/detectors/mmd.py b/torchdrift/detectors/mmd.py
@@ -3,24 +3,26 @@
 import torch
 
 from . import Detector
+import torchdrift.utils
 
 
 class Kernel:
     """Base class for kernels
 
-Unless otherwise noted, all kernels implementing lengthscale detection
-use the median of pairwise distances as the lengthscale."""
+    Unless otherwise noted, all kernels implementing lengthscale detection
+    use the median of pairwise distances as the lengthscale."""
+
     pass
 
 
 class GaussianKernel(Kernel):
     r"""Unnormalized gaussian kernel
 
-.. math::
-    k(|x-y|) = \exp(-|x-y|^2/(2\ell^2))
+    .. math::
+        k(|x-y|) = \exp(-|x-y|^2/(2\ell^2))
+
+    where :math:`\ell` is the `lengthscale` (autodetected or given)."""
 
-where :math:`\ell` is the `lengthscale` (autodetected or given).
-"""
     def __init__(self, lengthscale=None):
         super().__init__()
         self.lengthscale = lengthscale
@@ -37,11 +39,10 @@ def __call__(self, dists):
 class ExpKernel(Kernel):
     r"""Unnormalized exponential kernel
 
-.. math::
-    k(|x-y|) = \exp(-|x-y|/\ell)
+    .. math::
+        k(|x-y|) = \exp(-|x-y|/\ell)
 
-where :math:`\ell` is the `lengthscale` (autodetected or given).
-"""
+    where :math:`\ell` is the `lengthscale` (autodetected or given)."""
 
     def __init__(self, lengthscale=None):
         super().__init__()
@@ -58,11 +59,10 @@ def __call__(self, dists):
 class RationalQuadraticKernel(Kernel):
     r"""Unnormalized rational quadratic kernel
 
-.. math::
-    k(|x-y|) = (1+|x-y|^2/(2 \alpha \ell^2))^{-\alpha}
+    .. math::
+        k(|x-y|) = (1+|x-y|^2/(2 \alpha \ell^2))^{-\alpha}
 
-where :math:`\ell` is the `lengthscale` (autodetected or given).
-"""
+    where :math:`\ell` is the `lengthscale` (autodetected or given)."""
 
     def __init__(self, lengthscale=None, alpha=1.0):
         super().__init__()
@@ -95,7 +95,7 @@ def kernel_mmd(x, y, n_perm=1000, kernel=GaussianKernel()):
 
     n, d = x.shape
     m, d2 = y.shape
-    assert d == d2
+    torchdrift.utils.check(d == d2, "feature dimension mismatch")
     xy = torch.cat([x.detach(), y.detach()], dim=0)
     dists = torch.cdist(xy, xy, p=2.0)
     # we are a bit sloppy here as we just keep the diagonal and everything twice
@@ -161,9 +161,9 @@ def predict_shift_from_features(
         compute_p_value: bool,
         individual_samples: bool = False,
     ):
-        assert (
-            not individual_samples
-        ), "Individual samples not supported by MMD detector"
+        torchdrift.utils.check(
+            not individual_samples, "Individual samples not supported by MMD detector"
+        )
         if not compute_p_value:
             ood_score = kernel_mmd(
                 outputs, base_outputs, n_perm=None, kernel=self.kernel
diff --git a/torchdrift/reducers/pca.py b/torchdrift/reducers/pca.py
@@ -1,5 +1,6 @@
 import torch
 from . import Reducer
+import torchdrift.utils
 
 
 class PCAReducer(Reducer):
@@ -21,7 +22,10 @@ def extra_repr(self) -> str:
 
     def fit(self, x: torch.Tensor) -> torch.Tensor:
         batch, feat = x.shape
-        assert min(batch, feat) >= self.n_components
+        torchdrift.utils.check(
+            min(batch, feat) >= self.n_components,
+            "need number of samples and size of feature to be at least the number of components",
+        )
         self.mean = x.mean(0, keepdim=True)
         x = x - self.mean
         u, s, v = x.svd()
diff --git a/torchdrift/utils/__init__.py b/torchdrift/utils/__init__.py
@@ -1,2 +1,8 @@
 from .experiments import DriftDetectionExperiment
 from .fit import fit
+
+
+def check(check, message):
+    """tests `check` and raises `RuntimeError` with `message` if false"""
+    if not check:
+        raise RuntimeError(message)
diff --git a/torchdrift/utils/experiments.py b/torchdrift/utils/experiments.py
@@ -1,6 +1,7 @@
 import torch
 import tqdm
 from .fit import fit
+import torchdrift.utils
 
 
 class DriftDetectionExperiment:
@@ -46,7 +47,10 @@ def evaluate(self, ind_datamodule, ood_datamodule, num_runs=50):
         ood_dl = ood_datamodule.default_dataloader(
             batch_size=num_ood, num_samples=num_ood * num_runs
         )
-        assert num_ood > 0 and num_ind >= 0
+        torchdrift.utils.check(
+            num_ood > 0 and num_ind >= 0,
+            "need at least one out of distribution sample and cannot have more than the sample size",
+        )
         all_drifted_scores = []
         all_ind_scores = []
         for r, (ind_batch, ood_batch) in tqdm.tqdm(