Offline Algorithms Quickstart#

This notebook compares the core offline changepoint detectors on a deterministic business KPI signal.

Goal: run the high-level APIs, run low-level detector and pipeline paths, and inspect breakpoints quickly.

import numpy as np
import cpd

try:
    import matplotlib.pyplot as plt
    HAS_MATPLOTLIB = True
except ImportError:
    HAS_MATPLOTLIB = False
    print("matplotlib is optional. Install with: python -m pip install matplotlib")
# Deterministic business KPI with two regime shifts and moderate noise
rng = np.random.default_rng(2026)
segment_lengths = [100, 120, 90]
segment_levels = [120.0, 155.0, 132.0]
base = np.concatenate([
    np.full(segment_lengths[0], segment_levels[0], dtype=np.float64),
    np.full(segment_lengths[1], segment_levels[1], dtype=np.float64),
    np.full(segment_lengths[2], segment_levels[2], dtype=np.float64),
])
x = base + rng.normal(0.0, 3.0, size=base.shape[0])
ground_truth_breakpoints = [segment_lengths[0], segment_lengths[0] + segment_lengths[1], len(x)]
ground_truth_breakpoints
# High-level offline APIs
results = {}
results["pelt"] = cpd.Pelt(model="l2", min_segment_len=10).fit(x).predict(n_bkps=2)
results["binseg"] = cpd.Binseg(model="l2", min_segment_len=10).fit(x).predict(n_bkps=2)
results["fpop"] = cpd.Fpop(min_segment_len=10).fit(x).predict(n_bkps=2)
{name: result.breakpoints for name, result in results.items()}
# Low-level detect_offline paths: segneigh and pipeline-form wbs
results["segneigh"] = cpd.detect_offline(
    x,
    detector="segneigh",
    cost="l2",
    constraints={"min_segment_len": 10},
    stopping={"n_bkps": 2},
    repro_mode="balanced",
)

wbs_pipeline = {
    "detector": {"kind": "wbs", "seed": 7},
    "cost": "l2",
    "constraints": {"min_segment_len": 10},
    "stopping": {"n_bkps": 2},
}
results["wbs"] = cpd.detect_offline(x, pipeline=wbs_pipeline, repro_mode="balanced")
{name: result.breakpoints for name, result in results.items()}
# Compact comparison table
rows = []
for name, result in results.items():
    rows.append({
        "algorithm": name,
        "breakpoints": result.breakpoints,
        "detector": result.diagnostics.algorithm,
        "cost": result.diagnostics.cost_model,
    })
for row in rows:
    print(f"{row['algorithm']:<9} breakpoints={row['breakpoints']} detector={row['detector']} cost={row['cost']}")
# Plot signal and predicted breakpoints
if HAS_MATPLOTLIB:
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.plot(x, color="black", linewidth=1.2, alpha=0.8, label="kpi")
    for cp in ground_truth_breakpoints[:-1]:
        ax.axvline(cp, color="tab:green", linestyle="--", linewidth=1.5, alpha=0.8)
    colors = {"pelt": "tab:blue", "binseg": "tab:orange", "fpop": "tab:purple", "segneigh": "tab:red", "wbs": "tab:brown"}
    for name, result in results.items():
        for cp in [bp for bp in result.breakpoints if bp < len(x)]:
            ax.scatter(cp, x[cp], color=colors[name], s=35, alpha=0.9)
    ax.set_title("Offline detector comparison on noisy KPI signal")
    ax.set_xlabel("sample")
    ax.set_ylabel("kpi value")
    ax.text(0.01, 0.96, "green dashed = ground truth", transform=ax.transAxes, va="top", fontsize=10)
    plt.show()
else:
    print("Skipping plot (matplotlib not installed).")

When to use which offline method#

  • Pelt: default when you want strong performance and automatic model selection behavior.

  • Binseg: fast approximate segmentation for quick iteration.

  • Fpop: efficient exact-style pruning path for l2 cost.

  • SegNeigh: exact fixed-k segmentation when n_bkps is known.

  • WBS: useful when masking or closely spaced changes are expected.