Source code for spey.backends.default_pdf.simple_pdf

"""This file contains basic likelihood implementations"""

from typing import Callable, List, Optional, Tuple, Union

from autograd import hessian, jacobian
from autograd import numpy as np
from autograd import value_and_grad
from scipy.optimize import NonlinearConstraint

from spey._version import __version__
from spey.backends.distributions import MainModel
from spey.base import BackendBase, ModelConfig
from spey.system.exceptions import InvalidInput
from spey.utils import ExpectationType

# pylint: disable=E1101,E1120,W0613


class SimplePDFBase(BackendBase):
    """Base structure for simple PDFs"""

    name: str = "__simplepdf_base__"
    """Name of the backend"""
    version: str = __version__
    """Version of the backend"""
    author: str = "SpeysideHEP"
    """Author of the backend"""
    spey_requires: str = __version__
    """Spey version required for the backend"""

    __slots__ = [
        "data",
        "signal_yields",
        "background_yields",
        "_main_model",
        "_main_kwargs",
        "_config",
    ]

    def __init__(
        self,
        signal_yields: List[float],
        background_yields: List[float],
        data: List[int],
    ):
        self.data = np.array(data, dtype=np.float64)
        self.signal_yields = np.array(signal_yields, dtype=np.float64)
        self.background_yields = np.array(background_yields, dtype=np.float64)
        self._main_model = None
        """main model"""
        self._main_kwargs = {}
        """Keyword arguments for main model"""
        self.constraints = []
        """Constraints to be used during optimisation process"""

        minimum_poi = -np.inf
        if self.is_alive:
            minimum_poi = -np.min(
                self.background_yields[self.signal_yields > 0.0]
                / self.signal_yields[self.signal_yields > 0.0]
            )

        self._config = ModelConfig(
            poi_index=0,
            minimum_poi=minimum_poi,
            suggested_init=[1.0],
            suggested_bounds=[(minimum_poi, 10)],
        )

    @property
    def is_alive(self) -> bool:
        """Returns True if at least one bin has non-zero signal yield."""
        return np.any(self.signal_yields > 0.0)

    def config(
        self, allow_negative_signal: bool = True, poi_upper_bound: float = 10.0
    ) -> ModelConfig:
        r"""
        Model configuration.

        Args:
            allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu`
              value will be allowed to be negative.
            poi_upper_bound (``float``, default ``40.0``): upper bound for parameter
              of interest, :math:`\mu`.

        Returns:
            ~spey.base.ModelConfig:
            Model configuration. Information regarding the position of POI in
            parameter list, suggested input and bounds.
        """
        if allow_negative_signal and poi_upper_bound == 10.0:
            return self._config

        return ModelConfig(
            self._config.poi_index,
            self._config.minimum_poi,
            self._config.suggested_init,
            [(0, poi_upper_bound)],
        )

    @property
    def main_model(self) -> MainModel:
        """retreive the main model distribution"""
        if self._main_model is None:

            def lam(pars: np.ndarray) -> np.ndarray:
                """
                Compute lambda for Main model with third moment expansion.
                For details see above eq 2.6 in :xref:`1809.05548`

                Args:
                    pars (``np.ndarray``): nuisance parameters

                Returns:
                    ``np.ndarray``:
                    expectation value of the poisson distribution with respect to
                    nuisance parameters.
                """
                return pars[0] * self.signal_yields + self.background_yields

            self._main_model = MainModel(lam, **self._main_kwargs)

        return self._main_model

    def get_objective_function(
        self,
        expected: ExpectationType = ExpectationType.observed,
        data: Optional[np.ndarray] = None,
        do_grad: bool = True,
    ) -> Callable[[np.ndarray], Union[Tuple[float, np.ndarray], float]]:
        r"""
        Objective function i.e. negative log-likelihood, :math:`-\log\mathcal{L}(\mu, \theta)`

        Args:
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.
            data (``np.ndarray``, default ``None``): input data that to fit
            do_grad (``bool``, default ``True``): If ``True`` return objective and its gradient
              as ``tuple`` if ``False`` only returns objective function.

        Returns:
            ``Callable[[np.ndarray], Union[float, Tuple[float, np.ndarray]]]``:
            Function which takes fit parameters (:math:`\mu` and :math:`\theta`) and returns either
            objective or objective and its gradient.
        """
        current_data = (
            self.background_yields if expected == ExpectationType.apriori else self.data
        )
        data = current_data if data is None else data

        def negative_loglikelihood(pars: np.ndarray) -> np.ndarray:
            """Compute twice negative log-likelihood"""
            return -self.main_model.log_prob(pars, data)

        if do_grad:
            return value_and_grad(negative_loglikelihood, argnum=0)

        return negative_loglikelihood

    def get_logpdf_func(
        self,
        expected: ExpectationType = ExpectationType.observed,
        data: Optional[np.array] = None,
    ) -> Callable[[np.ndarray, np.ndarray], float]:
        r"""
        Generate function to compute :math:`\log\mathcal{L}(\mu, \theta)` where :math:`\mu` is the
        parameter of interest and :math:`\theta` are nuisance parameters.

        Args:
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.
            data (``np.array``, default ``None``): input data that to fit

        Returns:
            ``Callable[[np.ndarray], float]``:
            Function that takes fit parameters (:math:`\mu` and :math:`\theta`) and computes
            :math:`\log\mathcal{L}(\mu, \theta)`.
        """
        current_data = (
            self.background_yields if expected == ExpectationType.apriori else self.data
        )
        data = current_data if data is None else data

        return lambda pars: self.main_model.log_prob(pars, data)

    def get_hessian_logpdf_func(
        self,
        expected: ExpectationType = ExpectationType.observed,
        data: Optional[np.ndarray] = None,
    ) -> Callable[[np.ndarray], float]:
        r"""
        Currently Hessian of :math:`\log\mathcal{L}(\mu, \theta)` is only used to compute
        variance on :math:`\mu`. This method returns a callable function which takes fit
        parameters (:math:`\mu` and :math:`\theta`) and returns Hessian.

        Args:
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.
            data (``np.ndarray``, default ``None``): input data that to fit

        Returns:
            ``Callable[[np.ndarray], float]``:
            Function that takes fit parameters (:math:`\mu` and :math:`\theta`) and
            returns Hessian of :math:`\log\mathcal{L}(\mu, \theta)`.
        """
        current_data = (
            self.background_yields if expected == ExpectationType.apriori else self.data
        )
        data = current_data if data is None else data

        def log_prob(pars: np.ndarray) -> np.ndarray:
            """Compute log-probability"""
            return self.main_model.log_prob(pars, data)

        return hessian(log_prob, argnum=0)

    def get_sampler(self, pars: np.ndarray) -> Callable[[int], np.ndarray]:
        r"""
        Retreives the function to sample from.

        Args:
            pars (``np.ndarray``): fit parameters (:math:`\mu` and :math:`\theta`)
            include_auxiliary (``bool``): wether or not to include auxiliary data
              coming from the constraint model.

        Returns:
            ``Callable[[int, bool], np.ndarray]``:
            Function that takes ``number_of_samples`` as input and draws as many samples
            from the statistical model.
        """

        def sampler(sample_size: int, *args, **kwargs) -> np.ndarray:
            """
            Fucntion to generate samples.

            Args:
                sample_size (``int``): number of samples to be generated.

            Returns:
                ``np.ndarray``:
                generated samples
            """
            return self.main_model.sample(pars, sample_size)

        return sampler

    def expected_data(self, pars: List[float], **kwargs) -> List[float]:
        r"""
        Compute the expected value of the statistical model

        Args:
            pars (``List[float]``): nuisance, :math:`\theta` and parameter of interest,

        Returns:
            ``List[float]``:
            Expected data of the statistical model
        """
        return self.main_model.expected_data(pars)


[docs] class Poisson(SimplePDFBase): r""" Poisson distribution without uncertainty implementation. .. math:: \mathcal{L}(\mu) = \prod_{i\in{\rm bins}}{\rm Poiss}(n^i|\mu n_s^i + n_b^i) where :math:`n_{s,b}` are signal and background yields and :math:`n` are the observations. Args: signal_yields (``List[float]``): signal yields background_yields (``List[float]``): background yields data (``List[int]``): data absolute_uncertainties (``List[float]``, optional): background uncertainties to be added. :math:`\mu n_s^i + n_b^i + \theta^i\sigma^i`. """ name: str = "default.poisson" """Name of the backend""" version: str = SimplePDFBase.version """Version of the backend""" author: str = SimplePDFBase.author """Author of the backend""" spey_requires: str = SimplePDFBase.spey_requires """Spey version required for the backend"""
[docs] def __init__( self, signal_yields: List[float], background_yields: List[float], data: List[int], absolute_uncertainties: Optional[List[float]] = None, ): super().__init__( signal_yields=signal_yields, background_yields=background_yields, data=data ) if absolute_uncertainties is not None: self.absolute_uncertainties = np.array(absolute_uncertainties) assert len(self.absolute_uncertainties) == len( self.data ), "Invalid input dimension." def lam(pars: np.ndarray) -> np.ndarray: return ( pars[0] * self.signal_yields + self.background_yields + pars[slice(1, len(self.data) + 1)] * self.absolute_uncertainties ) self._main_model = MainModel(lam) self._config = ModelConfig( poi_index=0, minimum_poi=self._config.minimum_poi, suggested_init=[1.0] * (len(self.data) + 1), suggested_bounds=[(self._config.minimum_poi, 10)] + [(None, None)] * len(self.data), ) def constraint(pars: np.ndarray) -> np.ndarray: """Compute the constraint term""" return ( self.background_yields + pars[slice(1, len(self.data) + 1)] * self.background_yields ) jac_constr = jacobian(constraint) self.constraints.append( NonlinearConstraint(constraint, 0.0, np.inf, jac=jac_constr) )
[docs] class Gaussian(SimplePDFBase): r""" Gaussian distribution for uncorrelated likelihoods. .. math:: \mathcal{L}(\mu) = \prod_{i\in{\rm bins}} \frac{1}{\sigma^i \sqrt{2\pi}} \exp\left[-\frac{1}{2} \left(\frac{\mu n_s^i + n_b^i - n^i}{\sigma^i} \right)^2 \right] where :math:`n_{s,b}` are signal and background yields and :math:`n` are the observations. .. versionadded:: 0.1.9 Args: signal_yields (``List[float]``): signal yields background_yields (``List[float]``): background yields data (``List[int]``): data absolute_uncertainties (``List[float]``): absolute uncertainties on the background """ name: str = "default.normal" """Name of the backend""" version: str = SimplePDFBase.version """Version of the backend""" author: str = SimplePDFBase.author """Author of the backend""" spey_requires: str = SimplePDFBase.spey_requires """Spey version required for the backend""" __slots__ = ["absolute_uncertainties"]
[docs] def __init__( self, signal_yields: List[float], background_yields: List[float], data: List[int], absolute_uncertainties: List[float], ): super().__init__( signal_yields=signal_yields, background_yields=background_yields, data=data ) self.absolute_uncertainties = np.array(absolute_uncertainties, dtype=np.float64) """absolute uncertainties on the background""" self._main_kwargs = {"cov": self.absolute_uncertainties, "pdf_type": "gauss"}
[docs] class MultivariateNormal(SimplePDFBase): r""" Multivariate Gaussian distribution. .. math:: \mathcal{L}(\mu) = \frac{1}{\sqrt{(2\pi)^k {\rm det}[\Sigma] }} \exp\left[-\frac{1}{2} (\mu n_s + n_b - n)\Sigma^{-1} (\mu n_s + n_b - n)^T \right] where :math:`n_{s,b}` are signal and background yields and :math:`n` are the observations. .. versionadded:: 0.1.9 ``covariance_matrix`` can also take callable function as an input where function takes nuisance parameters as inputs and return a new covariance matrix as output. **Example:** .. code:: python3 >>> import spey >>> import numpy as np >>> signal_yields = np.array([12.0, 15.0]) >>> background_yields = np.array([50.0, 48.0]) >>> data = np.array([36., 33.]) >>> covariance_matrix = np.array([[144.0, 13.0], [25.0, 256.0]]) >>> covariance_signal = np.array([[5.0, 1.0], [2.0, 3.0]]) >>> def cov_matrix(pars: np.ndarray) -> np.ndarray: >>> return covariance_matrix + covariance_signal * pars[0]**2 >>> pdf_wrapper = spey.get_backend('default.multivariate_normal') >>> model = pdf_wrapper( ... signal_yields=signal_yields, ... background_yields=background_yields, ... data=data, ... covariance_matrix=cov_matrix, ... ) .. versionchanged:: 0.2.6 The ability to input a callable covariance matrix has been added. Args: signal_yields (``List[float]``): signal yields background_yields (``List[float]``): background yields data (``List[int]``): data covariance_matrix (``List[List[float]] | callable``): covariance matrix (square matrix) * If you have correlation matrix and absolute uncertainties please use :func:`~spey.helper_functions.correlation_to_covariance` """ name: str = "default.multivariate_normal" """Name of the backend""" version: str = SimplePDFBase.version """Version of the backend""" author: str = SimplePDFBase.author """Author of the backend""" spey_requires: str = SimplePDFBase.spey_requires """Spey version required for the backend""" __slots__ = ["covariance_matrix"]
[docs] def __init__( self, signal_yields: List[float], background_yields: List[float], data: List[int], covariance_matrix: Union[List[List[float]], callable], ): super().__init__( signal_yields=signal_yields, background_yields=background_yields, data=data ) if not callable(covariance_matrix): self.covariance_matrix = np.array(covariance_matrix, dtype=np.float64) if ( self.covariance_matrix.shape[0] != len(self.background_yields) and len(self.covariance_matrix.shape) == 2 ): raise InvalidInput( "Dimensionality of the covariance matrix should match to the background" ) else: self.covariance_matrix = covariance_matrix self._main_kwargs = { "cov": self.covariance_matrix, "pdf_type": "multivariategauss", }