Source code for spey.combiner.uncorrelated_statistics_combiner

"""
Statistical Model combiner class: this class combines likelihoods
of different statistical models for hypothesis testing
"""

import logging
import warnings
from typing import Dict, Iterator, List, Optional, Tuple, Union

import numpy as np

from spey.base.hypotest_base import HypothesisTestingBase
from spey.base.model_config import ModelConfig
from spey.interface.statistical_model import StatisticalModel
from spey.optimizer.core import fit
from spey.system.exceptions import AnalysisQueryError, NegativeExpectedYields
from spey.utils import ExpectationType

__all__ = ["UnCorrStatisticsCombiner"]

log = logging.getLogger("Spey")



[docs]
class UnCorrStatisticsCombiner(HypothesisTestingBase):
    """
    Module to combine **uncorrelated** statistical models. It takes serries of
    :obj:`~spey.StatisticalModel` object as input. These statistical models does not
    need to have same backend. However, this class assumes that all the input
    statistical model's are completely independent from each other.

    .. warning::

        :obj:`~spey.UnCorrStatisticsCombiner` assumes that all input are uncorrelated and
        non of the statistical models posesses the same set of nuisance parameters. Thus
        each statistical model is optimised independently.

    Args:
        input arguments (:obj:`~spey.StatisticalModel`): Uncorrelated statistical models
        ntoys (``int``, default ``1000``): Number of toy samples for hypothesis testing.
          (Only used for toy-based hypothesis testing)

    Raises:
        :obj:`~spey.system.exceptions.AnalysisQueryError`: If multiple :class:`~spey.StatisticalModel`
          has the same :attr:`~spey.StatisticalModel.analysis` attribute.
        :obj:`TypeError`: If the input type is not :class:`~spey.StatisticalModel`.
    """

    __slots__ = ["_statistical_models"]

    def __init__(self, *args, ntoys: int = 1000):
        super().__init__(ntoys=ntoys)
        self._statistical_models = []
        for arg in args:
            self.append(arg)


[docs]
    def append(self, statistical_model: StatisticalModel) -> None:
        """
        Append new independent :class:`~spey.StatisticalModel` to the stack.

        Args:
            statistical_model (:class:`~spey.StatisticalModel`): new statistical model
              to be added to the stack.

        Raises:
            :obj:`~spey.system.exceptions.AnalysisQueryError`: If multiple :class:`~spey.StatisticalModel`
              has the same :attr:`~spey.StatisticalModel.analysis` attribute.
            :obj:`TypeError`: If the input type is not :class:`~spey.StatisticalModel`.
        """
        if isinstance(statistical_model, StatisticalModel):
            if statistical_model.analysis in self.analyses:
                raise AnalysisQueryError(f"{statistical_model.analysis} already exists.")
            self._statistical_models.append(statistical_model)
        else:
            raise TypeError(f"Can not append type {type(statistical_model)}.")



[docs]
    def remove(self, analysis: str) -> None:
        """
        Remove an analysis from the stack.

        Args:
            analysis (``Text``): unique identifier of the analysis to be removed.

        Raises:
            :obj:`~spey.system.exceptions.AnalysisQueryError`: If the unique identifier does not match
              any of the statistical models in the stack.
        """
        to_remove = None
        for name, model in self.items():
            if name == analysis:
                to_remove = model
        if to_remove is None:
            raise AnalysisQueryError(f"'{analysis}' is not among the analyses.")
        self._statistical_models.remove(to_remove)


    @property
    def statistical_models(self) -> Tuple[StatisticalModel]:
        """
        Accessor to the statistical model stack

        Returns:
            ``Tuple[StatisticalModel]``:
            statistical model stack as an ntuple.
        """
        return tuple(self._statistical_models)

    @property
    def analyses(self) -> List[str]:
        """
        List of the unique identifiers of the statistical models within the stack.

        Returns:
            ``List[Text]``:
            List of analysis names.
        """
        return [model.analysis for model in self]

    @property
    def minimum_poi(self) -> float:
        r"""
        Find the maximum minimum poi, :math:`\mu` value within the analysis stack.

        Returns:
            ``float``:
            minimum poi value that the combined stack can take.
        """
        return max(model.backend.config().minimum_poi for model in self)

    @property
    def is_alive(self) -> bool:
        """
        Returns true if there is at least one statistical model with at least
        one non zero bin with signal yield.

        Returns:
            ``bool``
        """
        return any(model.is_alive for model in self)

    @property
    def is_asymptotic_calculator_available(self) -> bool:
        """Check if Asymptotic calculator is available for the backend"""
        return all(model.is_asymptotic_calculator_available for model in self)

    @property
    def is_toy_calculator_available(self) -> bool:
        """Check if Toy calculator is available for the backend"""
        return False

    @property
    def is_chi_square_calculator_available(self) -> bool:
        r"""Check if :math:`\chi^2` calculator is available for the backend"""
        return all(model.is_chi_square_calculator_available for model in self)

    def __getitem__(self, item: Union[str, int]) -> StatisticalModel:
        """Retrieve a statistical model"""
        if isinstance(item, int):
            if item < len(self):
                return self.statistical_models[item]
            raise AnalysisQueryError(
                "Request exceeds number of statistical models available."
            )
        if isinstance(item, slice):
            return self.statistical_models[item]

        for model in self:
            if model.analysis == item:
                return model
        raise AnalysisQueryError(f"'{item}' is not among the analyses.")

    def __iter__(self) -> StatisticalModel:
        """Iterate over statistical models"""
        yield from self._statistical_models

    def __len__(self):
        """Number of statistical models within the stack"""
        return len(self._statistical_models)


[docs]
    def items(self) -> Iterator[Tuple[str, StatisticalModel]]:
        """
        Returns a generator that returns analysis name and :obj:`~spey.StatisticalModel`
        every iteration.

        Returns:
            ``Iterator[Tuple[Text, StatisticalModel]]``
        """
        return ((model.analysis, model) for model in self)



[docs]
    def find_most_sensitive(self) -> StatisticalModel:
        """
        If the cross sections are defined, finds the statistical model with minimum prefit
        excluded cross section value. See :attr:`~spey.StatisticalModel.s95exp`.

        Returns:
            :obj:`~spey.StatisticalModel`:
            Statistical model with minimum prefit excluded cross section value.
        """
        return self[np.argmin([model.s95exp for model in self])]



[docs]
    def likelihood(
        self,
        poi_test: float = 1.0,
        expected: ExpectationType = ExpectationType.observed,
        return_nll: bool = True,
        data: Optional[Dict[str, List[float]]] = None,
        statistical_model_options: Optional[Dict[str, Dict]] = None,
        **kwargs,
    ) -> float:
        r"""
        Compute the likelihood of the statistical model stack at a fixed parameter of interest

        Args:
            poi_test (``float``, default ``1.0``): parameter of interest or signal strength,
              :math:`\mu`.
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.

            return_nll (``bool``, default ``True``): If ``True``, returns negative log-likelihood value.
              if ``False`` returns likelihood value.
            data (``Dict[Text, List[float]]``, default ``None``): Input data to be used for fit. It needs
              to be given as a dictionary where the key argument is the name of the analysis and item is
              the statistical model specific data.
            statistical_model_options (``Optional[Dict[Text, Dict]]``, default ``None``): backend specific
              options. The dictionary key needs to be the backend name and the item needs to be the dictionary
              holding the keyword arguments specific to that particular backend.

              .. code-block:: python3

                >>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}

            kwargs: keyword arguments for the optimiser.

        Returns:
            ``float``:
            value of the likelihood of the stacked statistical model.
        """
        statistical_model_options = statistical_model_options or {}
        data = data or {}

        nll = 0.0
        for statistical_model in self:

            current_kwargs = {}
            current_kwargs.update(
                statistical_model_options.get(str(statistical_model.backend_type), {})
            )
            current_data = data.get(statistical_model.analysis, None)

            try:
                nll += statistical_model.likelihood(
                    poi_test=poi_test,
                    expected=expected,
                    data=current_data,
                    **current_kwargs,
                    **kwargs,
                )
            except NegativeExpectedYields as err:
                warnings.warn(
                    err.args[0] + f"\nSetting NLL({poi_test:.3f}) = nan",
                    category=RuntimeWarning,
                )
                nll = np.nan

            if np.isnan(nll):
                break

        return nll if return_nll or np.isnan(nll) else np.exp(-nll)



[docs]
    def generate_asimov_data(
        self,
        expected: ExpectationType = ExpectationType.observed,
        test_statistic: str = "qtilde",
        statistical_model_options: Optional[Dict[str, Dict]] = None,
        **kwargs,
    ) -> Dict[str, List[float]]:
        r"""
        Generate Asimov data for the statistical model. This function calls
        :func:`~spey.StatisticalModel.generate_asimov_data` function for each statistical model with
        appropriate ``statistical_model_options`` and generates Asimov data for each statistical
        model independently.

        Args:
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.

            test_statistic (``Text``, default ``"qtilde"``): test statistics.

              * ``'qtilde'``: (default) performs the calculation using the alternative test statistic,
                :math:`\tilde{q}_{\mu}`, see eq. (62) of :xref:`1007.1727`
                (:func:`~spey.hypothesis_testing.test_statistics.qmu_tilde`).

                .. warning::

                    Note that this assumes that :math:`\hat\mu\geq0`, hence ``allow_negative_signal``
                    assumed to be ``False``. If this function has been executed by user, ``spey``
                    assumes that this is taken care of throughout the external code consistently.
                    Whilst computing p-values or upper limit on :math:`\mu` through ``spey`` this
                    is taken care of automatically in the backend.

              * ``'q'``: performs the calculation using the test statistic :math:`q_{\mu}`, see
                eq. (54) of :xref:`1007.1727` (:func:`~spey.hypothesis_testing.test_statistics.qmu`).
              * ``'q0'``: performs the calculation using the discovery test statistic, see eq. (47)
                of :xref:`1007.1727` :math:`q_{0}` (:func:`~spey.hypothesis_testing.test_statistics.q0`).

            statistical_model_options (``Dict[Text, Dict]``, default ``None``): backend specific
              options. The dictionary key needs to be the backend name and the item needs to be the dictionary
              holding the keyword arguments specific to that particular backend.

              .. code-block:: python3

                >>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}

            kwargs: keyword arguments for the optimiser.

        Returns:
            ``Dict[Text, List[float]]``:
            Returns a dictionary for data specific to each analysis. keywords will be analysis names
            and the items are data.
        """
        statistical_model_options = statistical_model_options or {}

        data = {}
        for statistical_model in self:
            current_kwargs = {}
            current_kwargs.update(
                statistical_model_options.get(str(statistical_model.backend_type), {})
            )

            data.update(
                {
                    statistical_model.analysis: statistical_model.generate_asimov_data(
                        expected=expected,
                        test_statistic=test_statistic,
                        **current_kwargs,
                        **kwargs,
                    )
                }
            )

        return data



[docs]
    def asimov_likelihood(
        self,
        poi_test: float = 1.0,
        expected: ExpectationType = ExpectationType.observed,
        return_nll: bool = True,
        test_statistics: str = "qtilde",
        statistical_model_options: Optional[Dict[str, Dict]] = None,
        **kwargs,
    ) -> float:
        r"""
        Compute likelihood of the statistical model stack generated with the Asimov data.

        Args:
            poi_test (``float``, default ``1.0``): parameter of interest, :math:`\mu`.
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.

            return_nll (``bool``, default ``True``): If ``True``, returns negative log-likelihood value.
              if ``False`` returns likelihood value.
            test_statistics (``Text``, default ``"qtilde"``): test statistics.

              * ``'qtilde'``: (default) performs the calculation using the alternative test statistic,
                :math:`\tilde{q}_{\mu}`, see eq. (62) of :xref:`1007.1727`
                (:func:`~spey.hypothesis_testing.test_statistics.qmu_tilde`).

                .. warning::

                    Note that this assumes that :math:`\hat\mu\geq0`, hence ``allow_negative_signal``
                    assumed to be ``False``. If this function has been executed by user, ``spey``
                    assumes that this is taken care of throughout the external code consistently.
                    Whilst computing p-values or upper limit on :math:`\mu` through ``spey`` this
                    is taken care of automatically in the backend.

              * ``'q'``: performs the calculation using the test statistic :math:`q_{\mu}`, see
                eq. (54) of :xref:`1007.1727` (:func:`~spey.hypothesis_testing.test_statistics.qmu`).
              * ``'q0'``: performs the calculation using the discovery test statistic, see eq. (47)
                of :xref:`1007.1727` :math:`q_{0}` (:func:`~spey.hypothesis_testing.test_statistics.q0`).

            statistical_model_options (``Dict[Text, Dict]``, default ``None``): backend specific
              options. The dictionary key needs to be the backend name and the item needs to be the dictionary
              holding the keyword arguments specific to that particular backend.

              .. code-block:: python3

                >>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}

            kwargs: keyword arguments for the optimiser.

        Returns:
            ``float``:
            likelihood computed for asimov data
        """
        return self.likelihood(
            poi_test=poi_test,
            expected=expected,
            return_nll=return_nll,
            data=self.generate_asimov_data(
                expected=expected,
                test_statistic=test_statistics,
                statistical_model_options=statistical_model_options,
                **kwargs,
            ),
            statistical_model_options=statistical_model_options,
            **kwargs,
        )



[docs]
    def maximize_likelihood(
        self,
        return_nll: bool = True,
        expected: ExpectationType = ExpectationType.observed,
        allow_negative_signal: bool = True,
        data: Optional[Dict[str, List[float]]] = None,
        initial_muhat_value: Optional[float] = None,
        par_bounds: Optional[List[Tuple[float, float]]] = None,
        statistical_model_options: Optional[Dict[str, Dict]] = None,
        **optimiser_options,
    ) -> Tuple[float, float]:
        r"""
        Find the maximum of the likelihood.

        Args:
            return_nll (``bool``, default ``True``): If  ``True``, returns negative log-likelihood value.
              if ``False`` returns likelihood value.
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.

            allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu`
              value will be allowed to be negative.
            data (``Dict[Text, List[float]]``, default ``None``): Input data to be used for fit. It needs
              to be given as a dictionary where the key argument is the name of the analysis and item is
              the statistical model specific data.
            initial_muhat_value (``float``, default ``None``): Initialisation for the :math:`\hat\mu` for
              the optimiser. If ``None`` the initial value will be estimated by weighted combination of
              :math:`\hat\mu_i` where :math:`i` stands for the statistical models within the stack.

              .. math::

                    \mu_{\rm init} = \frac{1}{\sum_i \sigma_{\hat\mu, i}^2} \sum_i \frac{\hat\mu_i}{\sigma_{\hat\mu, i}^2}

              where the value of :math:`\sigma_{\hat\mu}` has been estimated by
              :func:`~spey.base.hypotest_base.HypothesisTestingBase.sigma_mu` function.
            par_bounds (``List[Tuple[float, float]]``, default  ``None``): parameter bounds for
              the optimiser.
            statistical_model_options (``Dict[Text, Dict]``, default ``None``): backend specific
              options. The dictionary key needs to be the backend name and the item needs to be the dictionary
              holding the keyword arguments specific to that particular backend.

              .. code-block:: python3

                >>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}

            kwargs: keyword arguments for the optimiser.

        Returns:
            ``Tuple[float, float]``:
            :math:`\hat\mu` value and maximum value of the likelihood.
        """
        statistical_model_options = statistical_model_options or {}
        data = data or {}

        # muhat initial value estimation in gaussian limit
        mu_init = initial_muhat_value or 0.0
        if initial_muhat_value is None:
            try:
                _mu, _sigma_mu = np.zeros(len(self)), np.ones(len(self))
                for idx, stat_model in enumerate(self):

                    current_kwargs = {}
                    current_kwargs.update(
                        statistical_model_options.get(str(stat_model.backend_type), {})
                    )

                    _mu[idx] = stat_model.maximize_likelihood(
                        expected=expected, **current_kwargs, **optimiser_options
                    )[0]
                    _sigma_mu[idx] = stat_model.sigma_mu(
                        poi_test=_mu[idx],
                        expected=expected,
                        **current_kwargs,
                        **optimiser_options,
                    )
                norm = np.sum(np.power(_sigma_mu, -2))
                mu_init = np.true_divide(1.0, norm) * np.sum(
                    np.true_divide(_mu, np.square(_sigma_mu))
                )
            except Exception as err:
                log.debug(str(err))
                mu_init = 0.0

        config: ModelConfig = ModelConfig(
            poi_index=0,
            minimum_poi=self.minimum_poi,
            suggested_init=[float(mu_init)],
            suggested_bounds=(
                par_bounds
                or [
                    (
                        self.minimum_poi if allow_negative_signal else 0.0,
                        max(10.0, mu_init),
                    )
                ]
            ),
        )

        def twice_nll(poi_test: Union[float, np.ndarray]) -> float:
            """Function to compute twice negative log-likelihood for a given poi test"""
            return 2.0 * self.likelihood(
                poi_test if isinstance(poi_test, float) else poi_test[0],
                expected=expected,
                return_nll=True,
                data=data,
                statistical_model_options=statistical_model_options,
                **optimiser_options,
            )

        twice_nll, fit_params = fit(
            func=twice_nll,
            model_configuration=config,
            **optimiser_options,
        )

        return fit_params[0], twice_nll / 2.0 if return_nll else np.exp(-twice_nll / 2.0)



[docs]
    def maximize_asimov_likelihood(
        self,
        return_nll: bool = True,
        expected: ExpectationType = ExpectationType.observed,
        test_statistics: str = "qtilde",
        initial_muhat_value: Optional[float] = None,
        par_bounds: Optional[List[Tuple[float, float]]] = None,
        statistical_model_options: Optional[Dict[str, Dict]] = None,
        **optimiser_options,
    ) -> Tuple[float, float]:
        r"""
        Find the maximum of the likelihood which computed with respect to Asimov data.

        Args:
            return_nll (``bool``, default ``True``): If ``True``, returns negative log-likelihood value.
              if ``False`` returns likelihood value.
            expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
              p-values to be computed.

              * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
                prescriotion which means that the experimental data will be assumed to be the truth
                (default).
              * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
                post-fit prescriotion which means that the experimental data will be assumed to be
                the truth.
              * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
                prescription which means that the SM will be assumed to be the truth.

            test_statistics (``Text``, default ``"qtilde"``): test statistic.

              * ``'qtilde'``: (default) performs the calculation using the alternative test statistic,
                :math:`\tilde{q}_{\mu}`, see eq. (62) of :xref:`1007.1727`
                (:func:`~spey.hypothesis_testing.test_statistics.qmu_tilde`).

                .. warning::

                    Note that this assumes that :math:`\hat\mu\geq0`, hence :obj:`allow_negative_signal`
                    assumed to be :obj:`False`. If this function has been executed by user, :obj:`spey`
                    assumes that this is taken care of throughout the external code consistently.
                    Whilst computing p-values or upper limit on :math:`\mu` through :obj:`spey` this
                    is taken care of automatically in the backend.

              * ``'q'``: performs the calculation using the test statistic :math:`q_{\mu}`, see
                eq. (54) of :xref:`1007.1727` (:func:`~spey.hypothesis_testing.test_statistics.qmu`).
              * ``'q0'``: performs the calculation using the discovery test statistic, see eq. (47)
                of :xref:`1007.1727` :math:`q_{0}` (:func:`~spey.hypothesis_testing.test_statistics.q0`).

            initial_muhat_value (``float``, default ``None``): Initial value for :math:`\hat\mu`.
            par_bounds (``List[Tuple[float, float]]``, default ``None``): parameter bounds for
              the optimiser.
            statistical_model_options (``Dict[Text, Dict]``, default ``None``): backend specific
              options. The dictionary key needs to be the backend name and the item needs to be the dictionary
              holding the keyword arguments specific to that particular backend.

              .. code-block:: python3

                >>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}

            kwargs: keyword arguments for the optimiser.

        Returns:
            ``Tuple[float, float]``:
            :math:`\hat\mu` value and maximum value of the likelihood.
        """
        allow_negative_signal: bool = True if test_statistics in ["q", "qmu"] else False

        config: ModelConfig = ModelConfig(
            poi_index=0,
            minimum_poi=self.minimum_poi,
            suggested_init=[initial_muhat_value or 0.0],
            suggested_bounds=(
                par_bounds or [(self.minimum_poi if allow_negative_signal else 0.0, 10.0)]
            ),
        )

        statistical_model_options = statistical_model_options or {}

        data = self.generate_asimov_data(
            expected=expected,
            test_statistic=test_statistics,
            statistical_model_options=statistical_model_options,
            **optimiser_options,
        )

        def twice_nll(poi_test: Union[float, np.ndarray]) -> float:
            """Function to compute twice negative log-likelihood for a given poi test"""
            return 2.0 * self.likelihood(
                poi_test if isinstance(poi_test, float) else poi_test[0],
                expected=expected,
                data=data,
                statistical_model_options=statistical_model_options,
                **optimiser_options,
            )

        twice_nll, fit_params = fit(
            func=twice_nll,
            model_configuration=config,
            **optimiser_options,
        )

        return fit_params[0], twice_nll / 2.0 if return_nll else np.exp(-twice_nll / 2.0)


    def __matmul__(self, other: StatisticalModel):
        new_model = UnCorrStatisticsCombiner(*self._statistical_models)
        if isinstance(other, StatisticalModel):
            new_model.append(other)
        elif isinstance(other, UnCorrStatisticsCombiner):
            for model in other:
                new_model.append(model)
        else:
            raise ValueError(
                f"Can not combine type<{type(other)}> with UnCorrStatisticsCombiner"
            )
        return new_model