"""
Abstract base classes for ``spey`` statistical-model backends.
This module defines two abstract base classes:
* :class:`~spey.BackendBase` — the interface every statistical-model backend must
implement to integrate with ``spey``'s hypothesis-testing machinery.
* :class:`~spey.base.backend_base.ConverterBase` — a lightweight interface for
objects that convert one statistical model representation into a
:class:`~spey.BackendBase` instance.
For a step-by-step guide on writing, registering, and packaging a new backend see
the :ref:`sec_new_plugin` tutorial in the documentation.
"""
from abc import ABC, abstractmethod
from typing import Callable, List, Optional, Tuple, Union
import numpy as np
from spey.base.model_config import ModelConfig
from spey.utils import ExpectationType
__all__ = ["BackendBase"]
def __dir__():
return __all__
[docs]
class BackendBase(ABC):
r"""
Abstract base class that every ``spey`` statistical-model backend must inherit.
``spey`` relies on a plugin system to support multiple likelihood prescriptions.
Any new prescription is expressed as a Python class that inherits
:class:`~spey.BackendBase` and implements, at minimum,
:func:`~spey.BackendBase.config` and :func:`~spey.BackendBase.get_logpdf_func`.
The framework then automatically enables hypothesis testing, upper-limit
computation, and Asimov-data generation for the new prescription through
:class:`~spey.StatisticalModel`.
**Required class-level metadata**
Each backend class must expose the following attributes so that ``spey``'s plugin
registry can identify, version-check, and cite it:
.. code-block:: python
class MyBackend(spey.BackendBase):
name = "my_package.my_model" # unique entry-point name
version = "1.0.0" # backend version string
author = "Jane Doe <jane@example.com>"
spey_requires = ">=0.1.0" # minimum compatible spey version
doi = [] # optional list of citable DOIs
arXiv = [] # optional list of arXiv IDs
**Required methods**
Subclasses *must* implement:
* :func:`~spey.BackendBase.config` — returns a
:class:`~spey.base.model_config.ModelConfig` describing the parameter
structure (number of parameters, POI index, suggested initial values,
and parameter bounds).
* :func:`~spey.BackendBase.get_logpdf_func` — returns a callable
``f(pars: np.ndarray) -> float`` that evaluates
:math:`\log\mathcal{L}(\mu, \theta)` for a given parameter vector.
**Optional methods**
Each optional method unlocks additional capabilities in the ``spey`` interface:
.. list-table::
:header-rows: 1
:widths: 40 60
* - Method
- Capability unlocked
* - :func:`~spey.BackendBase.is_alive`
- Quick validity check; defaults to ``True``.
* - :func:`~spey.BackendBase.expected_data`
- Required for the **asymptotic** calculator and Asimov-data generation.
* - :func:`~spey.BackendBase.get_objective_function`
- Override to supply analytical gradients for the optimiser.
* - :func:`~spey.BackendBase.get_hessian_logpdf_func`
- Enables :func:`~spey.StatisticalModel.sigma_mu_from_hessian`.
* - :func:`~spey.BackendBase.get_sampler`
- Required for the **toy** (pseudo-experiment) calculator.
* - :func:`~spey.BackendBase.combine`
- Enables model combination via :func:`~spey.StatisticalModel.combine`
and the ``@`` operator.
* - :func:`~spey.BackendBase.negative_loglikelihood` and variants
- Optional fast-path overrides that bypass the generic ``spey`` optimiser.
**Minimal working example**
The example below implements a simple Poisson counting model,
:math:`\mathcal{L}(\mu) = \prod_i \mathrm{Poiss}(n^i \mid \mu s^i + b^i)`,
and registers it directly without a ``setup.py``:
.. code-block:: python
import numpy as np
import spey
from spey.base.model_config import ModelConfig
@spey.register_backend
class PoissonModel(spey.BackendBase):
name = "my_package.poisson"
version = "1.0.0"
author = "Jane Doe"
spey_requires = ">=0.1.0"
def __init__(self, signal, background, data):
self._signal = np.array(signal, dtype=float)
self._background = np.array(background, dtype=float)
self._data = np.array(data, dtype=float)
@property
def is_alive(self):
return bool(np.any(self._signal > 0.0))
def config(self, allow_negative_signal=True, poi_upper_bound=10.0):
minimum_poi = -10.0 if allow_negative_signal else 0.0
return ModelConfig(
poi_index=0,
minimum_poi=minimum_poi,
suggested_init=[1.0],
suggested_bounds=[(minimum_poi, poi_upper_bound)],
)
def get_logpdf_func(
self, expected=spey.ExpectationType.observed, data=None
):
obs = self._data if data is None else np.array(data)
if expected is spey.ExpectationType.apriori:
obs = self._background
def logpdf(pars):
mu = pars[0]
rate = mu * self._signal + self._background
return float(np.sum(obs * np.log(rate) - rate))
return logpdf
def expected_data(self, pars):
mu = pars[0]
return list(mu * self._signal + self._background)
# Use the model
model = spey.get_backend("my_package.poisson")(
signal=[5.0, 3.0],
background=[10.0, 8.0],
data=[12, 9],
analysis="example",
xsection=0.05,
)
print(model.exclusion_confidence_level())
.. seealso::
:ref:`sec_new_plugin` — full tutorial on writing, registering, and packaging
a ``spey`` plugin, including entry-point installation via ``setup.py`` /
``pyproject.toml`` and citation metadata.
"""
@property
def is_alive(self) -> bool:
"""
Whether the model has at least one bin with a non-zero signal yield.
The default implementation always returns ``True``. Override this to
short-circuit expensive calculations for signal hypotheses that are
effectively empty.
Returns:
``bool``:
``True`` if at least one signal bin is non-zero; ``False`` otherwise.
"""
return True
@abstractmethod
def config(
self, allow_negative_signal: bool = True, poi_upper_bound: float = 10.0
) -> ModelConfig:
r"""
Return the model configuration used by the optimiser.
This **abstract** method must be implemented by every backend. It communicates
the parameter structure of the model to the ``spey`` optimiser: how many
parameters there are, which index belongs to the parameter of interest (POI)
:math:`\mu`, what sensible initial values are, and what bounds to apply.
Args:
allow_negative_signal (``bool``, default ``True``): When ``True``, the lower
bound of the POI is set to
:attr:`~spey.base.model_config.ModelConfig.minimum_poi`; when ``False``
the lower bound is forced to ``0.0`` so that :math:`\hat\mu \geq 0`.
poi_upper_bound (``float``, default ``10.0``): Upper bound applied to the
POI :math:`\mu` during optimisation.
Returns:
~spey.base.model_config.ModelConfig:
Configuration object containing the POI index, minimum POI value,
suggested initialisation parameters, and suggested parameter bounds
for the optimiser.
"""
@abstractmethod
def get_logpdf_func(
self,
expected: ExpectationType = ExpectationType.observed,
data: Optional[Union[List[float], np.ndarray]] = None,
) -> Callable[[np.ndarray], float]:
r"""
Return a callable that evaluates :math:`\log\mathcal{L}(\mu, \theta)`.
This **abstract** method must be implemented by every backend. The returned
function is the primary input to ``spey``'s optimiser and hypothesis-testing
machinery.
The ``expected`` argument selects which dataset is used when ``data`` is
``None``:
* :obj:`~spey.ExpectationType.observed` — use the observed experimental counts.
* :obj:`~spey.ExpectationType.apriori` — use the background-only prediction
(SM hypothesis), giving the *expected* (pre-fit) likelihood.
When ``data`` is explicitly provided it always takes precedence over
``expected`` (this is used internally for Asimov-data computations).
Args:
expected (~spey.ExpectationType): Selects which dataset to use when
``data`` is ``None``.
* :obj:`~spey.ExpectationType.observed`: Use the observed data
(post-fit, default).
* :obj:`~spey.ExpectationType.aposteriori`: Use the observed data with
post-fit nuisance treatment.
* :obj:`~spey.ExpectationType.apriori`: Use the background-only
prediction (pre-fit / SM hypothesis).
data (``Union[List[float], np.ndarray]``, default ``None``): Explicit
dataset to condition on. When provided, overrides ``expected``.
Returns:
``Callable[[np.ndarray], float]``:
A function ``logpdf(pars) -> float`` where ``pars`` is a 1-D array of
fit parameters :math:`(\mu, \theta_1, \theta_2, \ldots)` and the return
value is :math:`\log\mathcal{L}(\mu, \theta)`.
"""
def get_objective_function(
self,
expected: ExpectationType = ExpectationType.observed,
data: Optional[Union[List[float], np.ndarray]] = None,
do_grad: bool = True,
) -> Callable[[np.ndarray], Union[float, Tuple[float, np.ndarray]]]:
r"""
Return the objective function (and optionally its gradient) for the optimiser.
The objective is the negative log-likelihood,
:math:`-\log\mathcal{L}(\mu, \theta)`. When ``do_grad=True`` the returned
callable should also return the gradient with respect to all parameters as a
second element of a tuple, enabling gradient-based optimisers.
The default implementation raises :obj:`NotImplementedError` for
``do_grad=True`` and falls back to negating the value of
:func:`~spey.BackendBase.get_logpdf_func` for ``do_grad=False``. Override
this method to provide analytical or auto-differentiation-based gradients,
which can substantially improve optimisation speed and stability.
Args:
expected (~spey.ExpectationType): Selects which dataset to use when
``data`` is ``None``.
* :obj:`~spey.ExpectationType.observed`: Use the observed data
(post-fit, default).
* :obj:`~spey.ExpectationType.aposteriori`: Use the observed data with
post-fit nuisance treatment.
* :obj:`~spey.ExpectationType.apriori`: Use the background-only
prediction (pre-fit / SM hypothesis).
data (``Union[List[float], np.ndarray]``, default ``None``): Explicit
dataset to condition on. When provided, overrides ``expected``.
do_grad (``bool``, default ``True``): If ``True``, return a callable that
yields ``(objective, gradient)``; if ``False``, return a callable that
yields only the scalar objective.
Raises:
:obj:`NotImplementedError`: When ``do_grad=True`` and the backend has not
overridden this method.
Returns:
``Callable[[np.ndarray], Union[float, Tuple[float, np.ndarray]]]``:
A function ``objective(pars)`` that returns either a scalar
:math:`-\log\mathcal{L}` (``do_grad=False``) or a tuple
``(-logL, gradient)`` (``do_grad=True``), where ``gradient`` is a 1-D
array of the same length as ``pars``.
"""
if do_grad:
raise NotImplementedError("Gradient is not implemented by default.")
logpdf = self.get_logpdf_func(expected=expected, data=data)
return lambda pars: -logpdf(pars)
def get_hessian_logpdf_func(
self,
expected: ExpectationType = ExpectationType.observed,
data: Optional[Union[List[float], np.ndarray]] = None,
) -> Callable[[np.ndarray], np.ndarray]:
r"""
Return a callable that evaluates the Hessian of :math:`\log\mathcal{L}(\mu, \theta)`.
The Hessian is used by :func:`~spey.StatisticalModel.sigma_mu_from_hessian`
to estimate the variance on the parameter of interest :math:`\mu` via the
inverse of the observed information matrix (see eqs. 27–28 of
:xref:`1007.1727`).
The default implementation raises :obj:`NotImplementedError`. Override this
method when an analytical or auto-differentiation Hessian is available, as it
is considerably more accurate than a finite-difference approximation.
Args:
expected (~spey.ExpectationType): Selects which dataset to use when
``data`` is ``None``.
* :obj:`~spey.ExpectationType.observed`: Use the observed data
(post-fit, default).
* :obj:`~spey.ExpectationType.aposteriori`: Use the observed data with
post-fit nuisance treatment.
* :obj:`~spey.ExpectationType.apriori`: Use the background-only
prediction (pre-fit / SM hypothesis).
data (``Union[List[float], np.ndarray]``, default ``None``): Explicit
dataset to condition on. When provided, overrides ``expected``.
Raises:
:obj:`NotImplementedError`: If the backend has not implemented the Hessian.
Returns:
``Callable[[np.ndarray], np.ndarray]``:
A function ``hessian(pars) -> np.ndarray`` where ``pars`` is a 1-D
parameter array and the return value is the square Hessian matrix of
:math:`\log\mathcal{L}` with shape ``(n_pars, n_pars)``.
"""
raise NotImplementedError("This method has not been implemented")
def get_sampler(self, pars: np.ndarray) -> Callable[[int], np.ndarray]:
r"""
Return a callable that draws pseudo-data from the model at fixed parameters.
Implementing this method enables the **toy** (pseudo-experiment) calculator
for hypothesis testing. The returned sampler is conditioned on the supplied
fit parameters ``pars``; ``spey`` typically calls this after fitting the
nuisance parameters for a given :math:`\mu`.
The default implementation raises :obj:`NotImplementedError`.
Args:
pars (:obj:`np.ndarray`): 1-D array of fit parameters
:math:`(\mu, \theta_1, \theta_2, \ldots)` at which to condition the
sampler.
Raises:
:obj:`NotImplementedError`: If the backend has not implemented a sampler.
Returns:
``Callable[[int], np.ndarray]``:
A function ``sampler(n) -> np.ndarray`` that draws ``n`` independent
pseudo-datasets from the model, returned as an array of shape
``(n, n_bins)``.
"""
raise NotImplementedError("This method has not been implemented")
def expected_data(self, pars: List[float]) -> List[float]:
r"""
Return the expected bin counts for a given parameter vector.
This method is used internally by
:func:`~spey.StatisticalModel.generate_asimov_data` to produce Asimov
datasets, and is therefore required for the **asymptotic** calculator. Without
it, only the :math:`\chi^2` calculator is available.
The default implementation raises :obj:`NotImplementedError`.
Args:
pars (``List[float]``): 1-D array or list of fit parameters
:math:`(\mu, \theta_1, \theta_2, \ldots)`.
Raises:
:obj:`NotImplementedError`: If the backend has not implemented this method.
Returns:
``List[float]``:
Expected bin counts :math:`\langle n^i \rangle = \mu s^i + b^i` (or the
model-specific equivalent) evaluated at ``pars``.
"""
raise NotImplementedError("This method has not been implemented")
def combine(self, other, **kwargs):
"""
Combine this statistical model with another backend instance.
Implementing this method enables model combination via
:func:`~spey.StatisticalModel.combine` and the ``@`` operator on
:class:`~spey.StatisticalModel`. The returned object must itself be a
:class:`~spey.BackendBase` instance so that ``spey`` can wrap it in a new
:class:`~spey.StatisticalModel`.
.. note::
This method is optional and only needs to be implemented if the backend
supports a specific combination routine (e.g. merging bin lists, combining
workspaces, or constructing a joint likelihood).
Args:
other (:obj:`~spey.BackendBase`): The backend instance to combine with.
kwargs: Backend-specific keyword arguments forwarded to the combination
routine.
Raises:
:obj:`NotImplementedError`: If the backend does not implement combination.
Returns:
:obj:`~spey.BackendBase`:
A new backend instance representing the combined statistical model.
"""
raise NotImplementedError("This method does not have combination implementation.")
def negative_loglikelihood(
self,
poi_test: float = 1.0,
expected: ExpectationType = ExpectationType.observed,
**kwargs,
) -> Tuple[float, np.ndarray]:
r"""
Compute the profiled negative log-likelihood at a fixed :math:`\mu`.
This is an **optional fast-path override**. ``spey`` first tries to call this
method; if it raises :obj:`NotImplementedError`, the interface falls back to
minimising the objective function from :func:`~spey.BackendBase.get_objective_function`
using the built-in optimiser. Implementing this method is only worthwhile when
the backend has an efficient internal minimiser for the nuisance parameters.
Args:
poi_test (``float``, default ``1.0``): Fixed value of the parameter of
interest :math:`\mu` at which to evaluate the profiled likelihood.
expected (~spey.ExpectationType): Selects which dataset to condition on.
* :obj:`~spey.ExpectationType.observed`: Use the observed data
(post-fit, default).
* :obj:`~spey.ExpectationType.aposteriori`: Use the observed data with
post-fit nuisance treatment.
* :obj:`~spey.ExpectationType.apriori`: Use the background-only
prediction (pre-fit / SM hypothesis).
kwargs: Additional keyword arguments forwarded to the backend's internal
optimiser.
Raises:
:obj:`NotImplementedError`: If the backend has not implemented this method
(the ``spey`` interface will then use the generic optimiser).
Returns:
``Tuple[float, np.ndarray]``:
A tuple ``(nll, pars)`` where ``nll`` is the profiled negative
log-likelihood :math:`-\log\mathcal{L}(\mu, \hat{\theta}_\mu)` and
``pars`` is the 1-D array of all fit parameters at the optimum.
"""
raise NotImplementedError("This method has not been implemented")
def asimov_negative_loglikelihood(
self,
poi_test: float = 1.0,
expected: ExpectationType = ExpectationType.observed,
test_statistics: str = "qtilde",
**kwargs,
) -> Tuple[float, np.ndarray]:
r"""
Compute the profiled negative log-likelihood at fixed :math:`\mu` on Asimov data.
This is an **optional fast-path override** for backends that can compute the
Asimov likelihood more efficiently than the generic ``spey`` pathway (which
first generates Asimov data via :func:`~spey.BackendBase.expected_data` and
then calls the standard optimiser).
Args:
poi_test (``float``, default ``1.0``): Fixed value of the parameter of
interest :math:`\mu`.
expected (~spey.ExpectationType): Selects which dataset to condition on
when constructing the Asimov dataset.
* :obj:`~spey.ExpectationType.observed`: Use the observed data
(post-fit, default).
* :obj:`~spey.ExpectationType.aposteriori`: Use the observed data with
post-fit nuisance treatment.
* :obj:`~spey.ExpectationType.apriori`: Use the background-only
prediction (pre-fit / SM hypothesis).
test_statistics (``str``, default ``"qtilde"``): Test statistic that
determines the signal strength used to generate the Asimov dataset
(``"q0"`` → :math:`\mu=1`; all others → :math:`\mu=0`).
* ``'qtilde'``: Alternative test statistic :math:`\tilde{q}_\mu`,
eq. (62) of :xref:`1007.1727`.
.. warning::
This test statistic assumes :math:`\hat\mu \geq 0`
(``allow_negative_signal=False``). When called through ``spey``'s
public interface this constraint is enforced automatically.
* ``'q'``: Standard test statistic :math:`q_\mu`,
eq. (54) of :xref:`1007.1727`.
* ``'q0'``: Discovery test statistic :math:`q_0`,
eq. (47) of :xref:`1007.1727`.
kwargs: Additional keyword arguments forwarded to the backend's internal
optimiser.
Raises:
:obj:`NotImplementedError`: If the backend has not implemented this method.
Returns:
``Tuple[float, np.ndarray]``:
A tuple ``(nll, pars)`` where ``nll`` is the profiled negative
log-likelihood evaluated on the Asimov dataset and ``pars`` is the 1-D
array of all fit parameters at the optimum.
"""
raise NotImplementedError("This method has not been implemented")
def minimize_negative_loglikelihood(
self,
expected: ExpectationType = ExpectationType.observed,
allow_negative_signal: bool = True,
**kwargs,
) -> Tuple[float, np.ndarray]:
r"""
Find the global minimum of the negative log-likelihood (free fit).
This is an **optional fast-path override**. ``spey`` first tries to call this
method; if it raises :obj:`NotImplementedError`, the interface falls back to
minimising the objective function from
:func:`~spey.BackendBase.get_objective_function` using the built-in optimiser.
Implement this method when the backend has a more efficient internal minimiser.
Args:
expected (~spey.ExpectationType): Selects which dataset to condition on.
* :obj:`~spey.ExpectationType.observed`: Use the observed data
(post-fit, default).
* :obj:`~spey.ExpectationType.aposteriori`: Use the observed data with
post-fit nuisance treatment.
* :obj:`~spey.ExpectationType.apriori`: Use the background-only
prediction (pre-fit / SM hypothesis).
allow_negative_signal (``bool``, default ``True``): When ``True``,
:math:`\hat\mu` is unconstrained; when ``False`` the fit enforces
:math:`\hat\mu \geq 0`.
kwargs: Additional keyword arguments forwarded to the backend's internal
optimiser.
Raises:
:obj:`NotImplementedError`: If the backend has not implemented this method.
Returns:
``Tuple[float, np.ndarray]``:
A tuple ``(nll, pars)`` where ``nll`` is the minimum negative
log-likelihood :math:`-\log\mathcal{L}(\hat\mu, \hat\theta)` and ``pars``
is the 1-D array of all fit parameters at the global optimum.
"""
raise NotImplementedError("This method has not been implemented")
def minimize_asimov_negative_loglikelihood(
self,
expected: ExpectationType = ExpectationType.observed,
test_statistics: str = "qtilde",
**kwargs,
) -> Tuple[float, np.ndarray]:
r"""
Find the global minimum of the negative log-likelihood on Asimov data (free fit).
This is an **optional fast-path override** complementing
:func:`~spey.BackendBase.asimov_negative_loglikelihood`. Together they allow
the asymptotic calculator to bypass ``spey``'s generic optimisation loop.
If this method raises :obj:`NotImplementedError`, the interface falls back to
the standard pipeline.
Args:
expected (~spey.ExpectationType): Selects which dataset to condition on
when constructing the Asimov dataset.
* :obj:`~spey.ExpectationType.observed`: Use the observed data
(post-fit, default).
* :obj:`~spey.ExpectationType.aposteriori`: Use the observed data with
post-fit nuisance treatment.
* :obj:`~spey.ExpectationType.apriori`: Use the background-only
prediction (pre-fit / SM hypothesis).
test_statistics (``str``, default ``"qtilde"``): Test statistic that
determines the signal strength used to generate the Asimov dataset
(``"q0"`` → :math:`\mu=1`; all others → :math:`\mu=0`).
* ``'qtilde'``: Alternative test statistic :math:`\tilde{q}_\mu`,
eq. (62) of :xref:`1007.1727`.
.. warning::
This test statistic assumes :math:`\hat\mu \geq 0`
(``allow_negative_signal=False``). When called through ``spey``'s
public interface this constraint is enforced automatically.
* ``'q'``: Standard test statistic :math:`q_\mu`,
eq. (54) of :xref:`1007.1727`.
* ``'q0'``: Discovery test statistic :math:`q_0`,
eq. (47) of :xref:`1007.1727`.
kwargs: Additional keyword arguments forwarded to the backend's internal
optimiser.
Raises:
:obj:`NotImplementedError`: If the backend has not implemented this method.
Returns:
``Tuple[float, np.ndarray]``:
A tuple ``(nll, pars)`` where ``nll`` is the minimum negative
log-likelihood on the Asimov dataset and ``pars`` is the 1-D array of
all fit parameters at the global optimum.
"""
raise NotImplementedError("This method has not been implemented")
[docs]
class ConverterBase(ABC):
"""
Abstract base class for objects that convert one statistical model into another.
A ``ConverterBase`` subclass acts as a stateless callable that accepts a
:class:`~spey.StatisticalModel` (or any other representation) and returns a new
:class:`~spey.BackendBase` instance. This is useful for translating between
different likelihood prescriptions without exposing construction details to the
user.
Subclasses must expose the same class-level metadata as :class:`~spey.BackendBase`
(``name``, ``version``, ``author``, ``spey_requires``) so that the plugin registry
can identify them, and must override :func:`__call__` to perform the actual
conversion.
.. note::
``ConverterBase`` subclasses are **not** expected to accept arguments in
``__init__``. All conversion logic should live in :func:`__call__`.
Example:
.. code-block:: python
import spey
from spey.base.backend_base import BackendBase, ConverterBase
class MyStatConverter(ConverterBase):
name = "example.converter"
version = "0.0.1"
author = "Tom Bombadil"
spey_requires = ">=0.1.0"
def __call__(self, stat_model: spey.StatisticalModel) -> BackendBase:
# Extract information from the input model and build a new backend
signal = stat_model.backend._signal
background = stat_model.backend._background
data = stat_model.backend._data
return UncorrelatedBackground(signal, background, data)
"""
def __call__(self, *args, **kwargs) -> BackendBase:
"""
Convert the input representation into a :class:`~spey.BackendBase` object.
Subclasses **must** override this method. It may accept any positional or
keyword arguments that are meaningful for the specific conversion (e.g. a
:class:`~spey.StatisticalModel`, a workspace dictionary, or raw arrays).
Raises:
:obj:`NotImplementedError`: If the subclass has not implemented
:func:`__call__`.
Returns:
:obj:`~spey.BackendBase`:
A new backend instance compatible with the ``spey`` interface.
"""
raise NotImplementedError("Invalid implementation of ConverterBase object")