{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Offline Algorithms Quickstart\n", "\n", "This notebook compares the core offline changepoint detectors on a deterministic business KPI signal.\n", "\n", "**Goal:** run the high-level APIs, run low-level detector and pipeline paths, and inspect breakpoints quickly." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import cpd\n", "\n", "try:\n", " import matplotlib.pyplot as plt\n", " HAS_MATPLOTLIB = True\n", "except ImportError:\n", " HAS_MATPLOTLIB = False\n", " print(\"matplotlib is optional. Install with: python -m pip install matplotlib\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Deterministic business KPI with two regime shifts and moderate noise\n", "rng = np.random.default_rng(2026)\n", "segment_lengths = [100, 120, 90]\n", "segment_levels = [120.0, 155.0, 132.0]\n", "base = np.concatenate([\n", " np.full(segment_lengths[0], segment_levels[0], dtype=np.float64),\n", " np.full(segment_lengths[1], segment_levels[1], dtype=np.float64),\n", " np.full(segment_lengths[2], segment_levels[2], dtype=np.float64),\n", "])\n", "x = base + rng.normal(0.0, 3.0, size=base.shape[0])\n", "ground_truth_breakpoints = [segment_lengths[0], segment_lengths[0] + segment_lengths[1], len(x)]\n", "ground_truth_breakpoints\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# High-level offline APIs\n", "results = {}\n", "results[\"pelt\"] = cpd.Pelt(model=\"l2\", min_segment_len=10).fit(x).predict(n_bkps=2)\n", "results[\"binseg\"] = cpd.Binseg(model=\"l2\", min_segment_len=10).fit(x).predict(n_bkps=2)\n", "results[\"fpop\"] = cpd.Fpop(min_segment_len=10).fit(x).predict(n_bkps=2)\n", "{name: result.breakpoints for name, result in results.items()}\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Low-level detect_offline paths: segneigh and pipeline-form wbs\n", "results[\"segneigh\"] = cpd.detect_offline(\n", " x,\n", " detector=\"segneigh\",\n", " cost=\"l2\",\n", " constraints={\"min_segment_len\": 10},\n", " stopping={\"n_bkps\": 2},\n", " repro_mode=\"balanced\",\n", ")\n", "\n", "wbs_pipeline = {\n", " \"detector\": {\"kind\": \"wbs\", \"seed\": 7},\n", " \"cost\": \"l2\",\n", " \"constraints\": {\"min_segment_len\": 10},\n", " \"stopping\": {\"n_bkps\": 2},\n", "}\n", "results[\"wbs\"] = cpd.detect_offline(x, pipeline=wbs_pipeline, repro_mode=\"balanced\")\n", "{name: result.breakpoints for name, result in results.items()}\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Compact comparison table\n", "rows = []\n", "for name, result in results.items():\n", " rows.append({\n", " \"algorithm\": name,\n", " \"breakpoints\": result.breakpoints,\n", " \"detector\": result.diagnostics.algorithm,\n", " \"cost\": result.diagnostics.cost_model,\n", " })\n", "for row in rows:\n", " print(f\"{row['algorithm']:<9} breakpoints={row['breakpoints']} detector={row['detector']} cost={row['cost']}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot signal and predicted breakpoints\n", "if HAS_MATPLOTLIB:\n", " fig, ax = plt.subplots(figsize=(12, 4))\n", " ax.plot(x, color=\"black\", linewidth=1.2, alpha=0.8, label=\"kpi\")\n", " for cp in ground_truth_breakpoints[:-1]:\n", " ax.axvline(cp, color=\"tab:green\", linestyle=\"--\", linewidth=1.5, alpha=0.8)\n", " colors = {\"pelt\": \"tab:blue\", \"binseg\": \"tab:orange\", \"fpop\": \"tab:purple\", \"segneigh\": \"tab:red\", \"wbs\": \"tab:brown\"}\n", " for name, result in results.items():\n", " for cp in [bp for bp in result.breakpoints if bp < len(x)]:\n", " ax.scatter(cp, x[cp], color=colors[name], s=35, alpha=0.9)\n", " ax.set_title(\"Offline detector comparison on noisy KPI signal\")\n", " ax.set_xlabel(\"sample\")\n", " ax.set_ylabel(\"kpi value\")\n", " ax.text(0.01, 0.96, \"green dashed = ground truth\", transform=ax.transAxes, va=\"top\", fontsize=10)\n", " plt.show()\n", "else:\n", " print(\"Skipping plot (matplotlib not installed).\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## When to use which offline method\n", "\n", "- `Pelt`: default when you want strong performance and automatic model selection behavior.\n", "- `Binseg`: fast approximate segmentation for quick iteration.\n", "- `Fpop`: efficient exact-style pruning path for `l2` cost.\n", "- `SegNeigh`: exact fixed-`k` segmentation when `n_bkps` is known.\n", "- `WBS`: useful when masking or closely spaced changes are expected." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.9" } }, "nbformat": 4, "nbformat_minor": 5 }