Skip to content

Detectors API

driftwatch.detectors.base.BaseDetector

BaseDetector(threshold: float, name: str)

Bases: ABC

Abstract base class for drift detectors.

All drift detection methods should inherit from this class and implement the detect method.

Parameters:

Name Type Description Default
threshold float

Threshold value for determining drift

required
name str

Human-readable name for the detector

required
Source code in src/driftwatch/detectors/base.py
def __init__(self, threshold: float, name: str) -> None:
    self.threshold = threshold
    self.name = name

detect abstractmethod

detect(reference: Series, production: Series) -> DetectionResult

Detect drift between reference and production data.

Parameters:

Name Type Description Default
reference Series

Reference data series

required
production Series

Production data series

required

Returns:

Type Description
DetectionResult

DetectionResult with drift status and metrics

Source code in src/driftwatch/detectors/base.py
@abstractmethod
def detect(
    self,
    reference: pd.Series,
    production: pd.Series,
) -> DetectionResult:
    """
    Detect drift between reference and production data.

    Args:
        reference: Reference data series
        production: Production data series

    Returns:
        DetectionResult with drift status and metrics
    """
    ...

driftwatch.detectors.numerical.PSIDetector

PSIDetector(threshold: float = 0.2, buckets: int = 10)

Bases: BaseDetector

Population Stability Index (PSI) for numerical drift detection.

PSI measures the shift in distribution between two populations. Commonly used thresholds: - PSI < 0.1: No significant change - 0.1 <= PSI < 0.2: Minor shift - PSI >= 0.2: Significant shift (drift)

Parameters:

Name Type Description Default
threshold float

PSI value above which drift is detected. Default is 0.2.

0.2
buckets int

Number of buckets for binning. Default is 10.

10
Example

detector = PSIDetector(threshold=0.2, buckets=10) result = detector.detect(reference_series, production_series)

Source code in src/driftwatch/detectors/numerical.py
def __init__(self, threshold: float = 0.2, buckets: int = 10) -> None:
    super().__init__(threshold=threshold, name="psi")
    self.buckets = buckets

detect

detect(reference: Series, production: Series) -> DetectionResult

Calculate PSI between reference and production distributions.

Returns:

Type Description
DetectionResult

DetectionResult with PSI score

Source code in src/driftwatch/detectors/numerical.py
def detect(
    self,
    reference: pd.Series,
    production: pd.Series,
) -> DetectionResult:
    """
    Calculate PSI between reference and production distributions.

    Returns:
        DetectionResult with PSI score
    """
    self._validate_inputs(reference, production)

    psi_value = self._calculate_psi(
        np.asarray(reference.dropna().values),
        np.asarray(production.dropna().values),
    )

    return DetectionResult(
        has_drift=psi_value >= self.threshold,
        score=float(psi_value),
        method=self.name,
        threshold=self.threshold,
        p_value=None,
    )

driftwatch.detectors.numerical.KSDetector

KSDetector(threshold: float = 0.05)

Bases: BaseDetector

Kolmogorov-Smirnov test for numerical drift detection.

The KS test measures the maximum distance between the cumulative distribution functions of two samples.

Parameters:

Name Type Description Default
threshold float

P-value threshold below which drift is detected. Default is 0.05 (95% confidence).

0.05
Example

detector = KSDetector(threshold=0.05) result = detector.detect(reference_series, production_series) print(f"Drift detected: {result.has_drift}")

Source code in src/driftwatch/detectors/numerical.py
def __init__(self, threshold: float = 0.05) -> None:
    super().__init__(threshold=threshold, name="ks_test")

detect

detect(reference: Series, production: Series) -> DetectionResult

Perform KS test between reference and production distributions.

Returns:

Type Description
DetectionResult

DetectionResult with KS statistic as score and p-value

Source code in src/driftwatch/detectors/numerical.py
def detect(
    self,
    reference: pd.Series,
    production: pd.Series,
) -> DetectionResult:
    """
    Perform KS test between reference and production distributions.

    Returns:
        DetectionResult with KS statistic as score and p-value
    """
    self._validate_inputs(reference, production)

    statistic, p_value = stats.ks_2samp(
        reference.dropna(),
        production.dropna(),
    )

    return DetectionResult(
        has_drift=p_value < self.threshold,
        score=float(statistic),
        method=self.name,
        threshold=self.threshold,
        p_value=float(p_value),
    )

driftwatch.detectors.numerical.WassersteinDetector

WassersteinDetector(threshold: float = 0.1)

Bases: BaseDetector

Wasserstein distance (Earth Mover's Distance) for drift detection.

Measures the minimum "work" required to transform one distribution into another. More sensitive to subtle distributional changes.

Parameters:

Name Type Description Default
threshold float

Distance above which drift is detected.

0.1
Source code in src/driftwatch/detectors/numerical.py
def __init__(self, threshold: float = 0.1) -> None:
    super().__init__(threshold=threshold, name="wasserstein")

detect

detect(reference: Series, production: Series) -> DetectionResult

Calculate Wasserstein distance between distributions.

Note: Values are normalized by the reference standard deviation to make the threshold more interpretable.

Source code in src/driftwatch/detectors/numerical.py
def detect(
    self,
    reference: pd.Series,
    production: pd.Series,
) -> DetectionResult:
    """
    Calculate Wasserstein distance between distributions.

    Note: Values are normalized by the reference standard deviation
    to make the threshold more interpretable.
    """
    self._validate_inputs(reference, production)

    ref_clean = reference.dropna().values
    prod_clean = production.dropna().values

    distance = stats.wasserstein_distance(ref_clean, prod_clean)

    # Normalize by reference std for interpretability
    ref_std = np.std(ref_clean)
    normalized_distance = distance / ref_std if ref_std > 0 else distance

    return DetectionResult(
        has_drift=normalized_distance >= self.threshold,
        score=float(normalized_distance),
        method=self.name,
        threshold=self.threshold,
        p_value=None,
    )

driftwatch.detectors.categorical.ChiSquaredDetector

ChiSquaredDetector(threshold: float = 0.05)

Bases: BaseDetector

Chi-Squared test for categorical drift detection.

Tests whether the frequency distribution of categories has changed between reference and production data.

Parameters:

Name Type Description Default
threshold float

P-value threshold below which drift is detected. Default is 0.05 (95% confidence).

0.05
Example

detector = ChiSquaredDetector(threshold=0.05) result = detector.detect(reference_series, production_series)

Source code in src/driftwatch/detectors/categorical.py
def __init__(self, threshold: float = 0.05) -> None:
    super().__init__(threshold=threshold, name="chi_squared")

detect

detect(reference: Series, production: Series) -> DetectionResult

Perform Chi-Squared test on category frequencies.

Returns:

Type Description
DetectionResult

DetectionResult with chi-squared statistic and p-value

Source code in src/driftwatch/detectors/categorical.py
def detect(
    self,
    reference: pd.Series,
    production: pd.Series,
) -> DetectionResult:
    """
    Perform Chi-Squared test on category frequencies.

    Returns:
        DetectionResult with chi-squared statistic and p-value
    """
    self._validate_inputs(reference, production)

    # Get all categories from both datasets
    all_categories = set(reference.dropna().unique()) | set(
        production.dropna().unique()
    )

    # Count frequencies
    ref_counts = reference.value_counts()
    prod_counts = production.value_counts()

    # Align to same categories
    ref_freq = np.array([ref_counts.get(cat, 0) for cat in all_categories])
    prod_freq = np.array([prod_counts.get(cat, 0) for cat in all_categories])

    # Handle edge case of zero frequencies
    if ref_freq.sum() == 0 or prod_freq.sum() == 0:
        return DetectionResult(
            has_drift=True,
            score=float("inf"),
            method=self.name,
            threshold=self.threshold,
            p_value=0.0,
        )

    # Calculate expected frequencies based on reference proportions
    ref_proportions = ref_freq / ref_freq.sum()
    expected = ref_proportions * prod_freq.sum()

    # Add small epsilon to avoid division by zero
    expected = np.maximum(expected, 1e-10)

    # Chi-squared statistic
    statistic, p_value = stats.chisquare(prod_freq, f_exp=expected)

    return DetectionResult(
        has_drift=p_value < self.threshold,
        score=float(statistic),
        method=self.name,
        threshold=self.threshold,
        p_value=float(p_value),
    )