"""
Statistical Model combiner class: this class combines likelihoods
of different statistical models for hypothesis testing
"""
import logging
import warnings
from typing import Dict, Iterator, List, Optional, Tuple, Union
import numpy as np
from spey.base.hypotest_base import HypothesisTestingBase
from spey.base.model_config import ModelConfig
from spey.interface.statistical_model import StatisticalModel
from spey.optimizer.core import fit
from spey.system.exceptions import AnalysisQueryError, NegativeExpectedYields
from spey.utils import ExpectationType
__all__ = ["UnCorrStatisticsCombiner"]
log = logging.getLogger("Spey")
[docs]
class UnCorrStatisticsCombiner(HypothesisTestingBase):
"""
Module to combine **uncorrelated** statistical models. It takes serries of
:obj:`~spey.StatisticalModel` object as input. These statistical models does not
need to have same backend. However, this class assumes that all the input
statistical model's are completely independent from each other.
.. warning::
:obj:`~spey.UnCorrStatisticsCombiner` assumes that all input are uncorrelated and
non of the statistical models posesses the same set of nuisance parameters. Thus
each statistical model is optimised independently.
Args:
input arguments (:obj:`~spey.StatisticalModel`): Uncorrelated statistical models
ntoys (``int``, default ``1000``): Number of toy samples for hypothesis testing.
(Only used for toy-based hypothesis testing)
Raises:
:obj:`~spey.system.exceptions.AnalysisQueryError`: If multiple :class:`~spey.StatisticalModel`
has the same :attr:`~spey.StatisticalModel.analysis` attribute.
:obj:`TypeError`: If the input type is not :class:`~spey.StatisticalModel`.
"""
__slots__ = ["_statistical_models"]
def __init__(self, *args, ntoys: int = 1000):
super().__init__(ntoys=ntoys)
self._statistical_models = []
for arg in args:
self.append(arg)
[docs]
def append(self, statistical_model: StatisticalModel) -> None:
"""
Append new independent :class:`~spey.StatisticalModel` to the stack.
Args:
statistical_model (:class:`~spey.StatisticalModel`): new statistical model
to be added to the stack.
Raises:
:obj:`~spey.system.exceptions.AnalysisQueryError`: If multiple :class:`~spey.StatisticalModel`
has the same :attr:`~spey.StatisticalModel.analysis` attribute.
:obj:`TypeError`: If the input type is not :class:`~spey.StatisticalModel`.
"""
if isinstance(statistical_model, StatisticalModel):
if statistical_model.analysis in self.analyses:
raise AnalysisQueryError(f"{statistical_model.analysis} already exists.")
self._statistical_models.append(statistical_model)
else:
raise TypeError(f"Can not append type {type(statistical_model)}.")
[docs]
def remove(self, analysis: str) -> None:
"""
Remove an analysis from the stack.
Args:
analysis (``Text``): unique identifier of the analysis to be removed.
Raises:
:obj:`~spey.system.exceptions.AnalysisQueryError`: If the unique identifier does not match
any of the statistical models in the stack.
"""
to_remove = None
for name, model in self.items():
if name == analysis:
to_remove = model
if to_remove is None:
raise AnalysisQueryError(f"'{analysis}' is not among the analyses.")
self._statistical_models.remove(to_remove)
@property
def statistical_models(self) -> Tuple[StatisticalModel]:
"""
Accessor to the statistical model stack
Returns:
``Tuple[StatisticalModel]``:
statistical model stack as an ntuple.
"""
return tuple(self._statistical_models)
@property
def analyses(self) -> List[str]:
"""
List of the unique identifiers of the statistical models within the stack.
Returns:
``List[Text]``:
List of analysis names.
"""
return [model.analysis for model in self]
@property
def minimum_poi(self) -> float:
r"""
Find the maximum minimum poi, :math:`\mu` value within the analysis stack.
Returns:
``float``:
minimum poi value that the combined stack can take.
"""
return max(model.backend.config().minimum_poi for model in self)
@property
def is_alive(self) -> bool:
"""
Returns true if there is at least one statistical model with at least
one non zero bin with signal yield.
Returns:
``bool``
"""
return any(model.is_alive for model in self)
@property
def is_asymptotic_calculator_available(self) -> bool:
"""Check if Asymptotic calculator is available for the backend"""
return all(model.is_asymptotic_calculator_available for model in self)
@property
def is_toy_calculator_available(self) -> bool:
"""Check if Toy calculator is available for the backend"""
return False
@property
def is_chi_square_calculator_available(self) -> bool:
r"""Check if :math:`\chi^2` calculator is available for the backend"""
return all(model.is_chi_square_calculator_available for model in self)
def __getitem__(self, item: Union[str, int]) -> StatisticalModel:
"""Retrieve a statistical model"""
if isinstance(item, int):
if item < len(self):
return self.statistical_models[item]
raise AnalysisQueryError(
"Request exceeds number of statistical models available."
)
if isinstance(item, slice):
return self.statistical_models[item]
for model in self:
if model.analysis == item:
return model
raise AnalysisQueryError(f"'{item}' is not among the analyses.")
def __iter__(self) -> StatisticalModel:
"""Iterate over statistical models"""
yield from self._statistical_models
def __len__(self):
"""Number of statistical models within the stack"""
return len(self._statistical_models)
[docs]
def items(self) -> Iterator[Tuple[str, StatisticalModel]]:
"""
Returns a generator that returns analysis name and :obj:`~spey.StatisticalModel`
every iteration.
Returns:
``Iterator[Tuple[Text, StatisticalModel]]``
"""
return ((model.analysis, model) for model in self)
[docs]
def find_most_sensitive(self) -> StatisticalModel:
"""
If the cross sections are defined, finds the statistical model with minimum prefit
excluded cross section value. See :attr:`~spey.StatisticalModel.s95exp`.
Returns:
:obj:`~spey.StatisticalModel`:
Statistical model with minimum prefit excluded cross section value.
"""
return self[np.argmin([model.s95exp for model in self])]
[docs]
def likelihood(
self,
poi_test: float = 1.0,
expected: ExpectationType = ExpectationType.observed,
return_nll: bool = True,
data: Optional[Dict[str, List[float]]] = None,
statistical_model_options: Optional[Dict[str, Dict]] = None,
**kwargs,
) -> float:
r"""
Compute the likelihood of the statistical model stack at a fixed parameter of interest
Args:
poi_test (``float``, default ``1.0``): parameter of interest or signal strength,
:math:`\mu`.
expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
p-values to be computed.
* :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
prescriotion which means that the experimental data will be assumed to be the truth
(default).
* :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
post-fit prescriotion which means that the experimental data will be assumed to be
the truth.
* :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
prescription which means that the SM will be assumed to be the truth.
return_nll (``bool``, default ``True``): If ``True``, returns negative log-likelihood value.
if ``False`` returns likelihood value.
data (``Dict[Text, List[float]]``, default ``None``): Input data to be used for fit. It needs
to be given as a dictionary where the key argument is the name of the analysis and item is
the statistical model specific data.
statistical_model_options (``Optional[Dict[Text, Dict]]``, default ``None``): backend specific
options. The dictionary key needs to be the backend name and the item needs to be the dictionary
holding the keyword arguments specific to that particular backend.
.. code-block:: python3
>>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}
kwargs: keyword arguments for the optimiser.
Returns:
``float``:
value of the likelihood of the stacked statistical model.
"""
statistical_model_options = statistical_model_options or {}
data = data or {}
nll = 0.0
for statistical_model in self:
current_kwargs = {}
current_kwargs.update(
statistical_model_options.get(str(statistical_model.backend_type), {})
)
current_data = data.get(statistical_model.analysis, None)
try:
nll += statistical_model.likelihood(
poi_test=poi_test,
expected=expected,
data=current_data,
**current_kwargs,
**kwargs,
)
except NegativeExpectedYields as err:
warnings.warn(
err.args[0] + f"\nSetting NLL({poi_test:.3f}) = nan",
category=RuntimeWarning,
)
nll = np.nan
if np.isnan(nll):
break
return nll if return_nll or np.isnan(nll) else np.exp(-nll)
[docs]
def generate_asimov_data(
self,
expected: ExpectationType = ExpectationType.observed,
test_statistic: str = "qtilde",
statistical_model_options: Optional[Dict[str, Dict]] = None,
**kwargs,
) -> Dict[str, List[float]]:
r"""
Generate Asimov data for the statistical model. This function calls
:func:`~spey.StatisticalModel.generate_asimov_data` function for each statistical model with
appropriate ``statistical_model_options`` and generates Asimov data for each statistical
model independently.
Args:
expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
p-values to be computed.
* :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
prescriotion which means that the experimental data will be assumed to be the truth
(default).
* :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
post-fit prescriotion which means that the experimental data will be assumed to be
the truth.
* :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
prescription which means that the SM will be assumed to be the truth.
test_statistic (``Text``, default ``"qtilde"``): test statistics.
* ``'qtilde'``: (default) performs the calculation using the alternative test statistic,
:math:`\tilde{q}_{\mu}`, see eq. (62) of :xref:`1007.1727`
(:func:`~spey.hypothesis_testing.test_statistics.qmu_tilde`).
.. warning::
Note that this assumes that :math:`\hat\mu\geq0`, hence ``allow_negative_signal``
assumed to be ``False``. If this function has been executed by user, ``spey``
assumes that this is taken care of throughout the external code consistently.
Whilst computing p-values or upper limit on :math:`\mu` through ``spey`` this
is taken care of automatically in the backend.
* ``'q'``: performs the calculation using the test statistic :math:`q_{\mu}`, see
eq. (54) of :xref:`1007.1727` (:func:`~spey.hypothesis_testing.test_statistics.qmu`).
* ``'q0'``: performs the calculation using the discovery test statistic, see eq. (47)
of :xref:`1007.1727` :math:`q_{0}` (:func:`~spey.hypothesis_testing.test_statistics.q0`).
statistical_model_options (``Dict[Text, Dict]``, default ``None``): backend specific
options. The dictionary key needs to be the backend name and the item needs to be the dictionary
holding the keyword arguments specific to that particular backend.
.. code-block:: python3
>>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}
kwargs: keyword arguments for the optimiser.
Returns:
``Dict[Text, List[float]]``:
Returns a dictionary for data specific to each analysis. keywords will be analysis names
and the items are data.
"""
statistical_model_options = statistical_model_options or {}
data = {}
for statistical_model in self:
current_kwargs = {}
current_kwargs.update(
statistical_model_options.get(str(statistical_model.backend_type), {})
)
data.update(
{
statistical_model.analysis: statistical_model.generate_asimov_data(
expected=expected,
test_statistic=test_statistic,
**current_kwargs,
**kwargs,
)
}
)
return data
[docs]
def asimov_likelihood(
self,
poi_test: float = 1.0,
expected: ExpectationType = ExpectationType.observed,
return_nll: bool = True,
test_statistics: str = "qtilde",
statistical_model_options: Optional[Dict[str, Dict]] = None,
**kwargs,
) -> float:
r"""
Compute likelihood of the statistical model stack generated with the Asimov data.
Args:
poi_test (``float``, default ``1.0``): parameter of interest, :math:`\mu`.
expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
p-values to be computed.
* :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
prescriotion which means that the experimental data will be assumed to be the truth
(default).
* :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
post-fit prescriotion which means that the experimental data will be assumed to be
the truth.
* :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
prescription which means that the SM will be assumed to be the truth.
return_nll (``bool``, default ``True``): If ``True``, returns negative log-likelihood value.
if ``False`` returns likelihood value.
test_statistics (``Text``, default ``"qtilde"``): test statistics.
* ``'qtilde'``: (default) performs the calculation using the alternative test statistic,
:math:`\tilde{q}_{\mu}`, see eq. (62) of :xref:`1007.1727`
(:func:`~spey.hypothesis_testing.test_statistics.qmu_tilde`).
.. warning::
Note that this assumes that :math:`\hat\mu\geq0`, hence ``allow_negative_signal``
assumed to be ``False``. If this function has been executed by user, ``spey``
assumes that this is taken care of throughout the external code consistently.
Whilst computing p-values or upper limit on :math:`\mu` through ``spey`` this
is taken care of automatically in the backend.
* ``'q'``: performs the calculation using the test statistic :math:`q_{\mu}`, see
eq. (54) of :xref:`1007.1727` (:func:`~spey.hypothesis_testing.test_statistics.qmu`).
* ``'q0'``: performs the calculation using the discovery test statistic, see eq. (47)
of :xref:`1007.1727` :math:`q_{0}` (:func:`~spey.hypothesis_testing.test_statistics.q0`).
statistical_model_options (``Dict[Text, Dict]``, default ``None``): backend specific
options. The dictionary key needs to be the backend name and the item needs to be the dictionary
holding the keyword arguments specific to that particular backend.
.. code-block:: python3
>>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}
kwargs: keyword arguments for the optimiser.
Returns:
``float``:
likelihood computed for asimov data
"""
return self.likelihood(
poi_test=poi_test,
expected=expected,
return_nll=return_nll,
data=self.generate_asimov_data(
expected=expected,
test_statistic=test_statistics,
statistical_model_options=statistical_model_options,
**kwargs,
),
statistical_model_options=statistical_model_options,
**kwargs,
)
[docs]
def maximize_likelihood(
self,
return_nll: bool = True,
expected: ExpectationType = ExpectationType.observed,
allow_negative_signal: bool = True,
data: Optional[Dict[str, List[float]]] = None,
initial_muhat_value: Optional[float] = None,
par_bounds: Optional[List[Tuple[float, float]]] = None,
statistical_model_options: Optional[Dict[str, Dict]] = None,
**optimiser_options,
) -> Tuple[float, float]:
r"""
Find the maximum of the likelihood.
Args:
return_nll (``bool``, default ``True``): If ``True``, returns negative log-likelihood value.
if ``False`` returns likelihood value.
expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
p-values to be computed.
* :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
prescriotion which means that the experimental data will be assumed to be the truth
(default).
* :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
post-fit prescriotion which means that the experimental data will be assumed to be
the truth.
* :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
prescription which means that the SM will be assumed to be the truth.
allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu`
value will be allowed to be negative.
data (``Dict[Text, List[float]]``, default ``None``): Input data to be used for fit. It needs
to be given as a dictionary where the key argument is the name of the analysis and item is
the statistical model specific data.
initial_muhat_value (``float``, default ``None``): Initialisation for the :math:`\hat\mu` for
the optimiser. If ``None`` the initial value will be estimated by weighted combination of
:math:`\hat\mu_i` where :math:`i` stands for the statistical models within the stack.
.. math::
\mu_{\rm init} = \frac{1}{\sum_i \sigma_{\hat\mu, i}^2} \sum_i \frac{\hat\mu_i}{\sigma_{\hat\mu, i}^2}
where the value of :math:`\sigma_{\hat\mu}` has been estimated by
:func:`~spey.base.hypotest_base.HypothesisTestingBase.sigma_mu` function.
par_bounds (``List[Tuple[float, float]]``, default ``None``): parameter bounds for
the optimiser.
statistical_model_options (``Dict[Text, Dict]``, default ``None``): backend specific
options. The dictionary key needs to be the backend name and the item needs to be the dictionary
holding the keyword arguments specific to that particular backend.
.. code-block:: python3
>>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}
kwargs: keyword arguments for the optimiser.
Returns:
``Tuple[float, float]``:
:math:`\hat\mu` value and maximum value of the likelihood.
"""
statistical_model_options = statistical_model_options or {}
data = data or {}
# muhat initial value estimation in gaussian limit
mu_init = initial_muhat_value or 0.0
if initial_muhat_value is None:
try:
_mu, _sigma_mu = np.zeros(len(self)), np.ones(len(self))
for idx, stat_model in enumerate(self):
current_kwargs = {}
current_kwargs.update(
statistical_model_options.get(str(stat_model.backend_type), {})
)
_mu[idx] = stat_model.maximize_likelihood(
expected=expected, **current_kwargs, **optimiser_options
)[0]
_sigma_mu[idx] = stat_model.sigma_mu(
poi_test=_mu[idx],
expected=expected,
**current_kwargs,
**optimiser_options,
)
norm = np.sum(np.power(_sigma_mu, -2))
mu_init = np.true_divide(1.0, norm) * np.sum(
np.true_divide(_mu, np.square(_sigma_mu))
)
except Exception as err:
log.debug(str(err))
mu_init = 0.0
config: ModelConfig = ModelConfig(
poi_index=0,
minimum_poi=self.minimum_poi,
suggested_init=[float(mu_init)],
suggested_bounds=(
par_bounds
or [
(
self.minimum_poi if allow_negative_signal else 0.0,
max(10.0, mu_init),
)
]
),
)
def twice_nll(poi_test: Union[float, np.ndarray]) -> float:
"""Function to compute twice negative log-likelihood for a given poi test"""
return 2.0 * self.likelihood(
poi_test if isinstance(poi_test, float) else poi_test[0],
expected=expected,
return_nll=True,
data=data,
statistical_model_options=statistical_model_options,
**optimiser_options,
)
twice_nll, fit_params = fit(
func=twice_nll,
model_configuration=config,
**optimiser_options,
)
return fit_params[0], twice_nll / 2.0 if return_nll else np.exp(-twice_nll / 2.0)
[docs]
def maximize_asimov_likelihood(
self,
return_nll: bool = True,
expected: ExpectationType = ExpectationType.observed,
test_statistics: str = "qtilde",
initial_muhat_value: Optional[float] = None,
par_bounds: Optional[List[Tuple[float, float]]] = None,
statistical_model_options: Optional[Dict[str, Dict]] = None,
**optimiser_options,
) -> Tuple[float, float]:
r"""
Find the maximum of the likelihood which computed with respect to Asimov data.
Args:
return_nll (``bool``, default ``True``): If ``True``, returns negative log-likelihood value.
if ``False`` returns likelihood value.
expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and
p-values to be computed.
* :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit
prescriotion which means that the experimental data will be assumed to be the truth
(default).
* :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via
post-fit prescriotion which means that the experimental data will be assumed to be
the truth.
* :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit
prescription which means that the SM will be assumed to be the truth.
test_statistics (``Text``, default ``"qtilde"``): test statistic.
* ``'qtilde'``: (default) performs the calculation using the alternative test statistic,
:math:`\tilde{q}_{\mu}`, see eq. (62) of :xref:`1007.1727`
(:func:`~spey.hypothesis_testing.test_statistics.qmu_tilde`).
.. warning::
Note that this assumes that :math:`\hat\mu\geq0`, hence :obj:`allow_negative_signal`
assumed to be :obj:`False`. If this function has been executed by user, :obj:`spey`
assumes that this is taken care of throughout the external code consistently.
Whilst computing p-values or upper limit on :math:`\mu` through :obj:`spey` this
is taken care of automatically in the backend.
* ``'q'``: performs the calculation using the test statistic :math:`q_{\mu}`, see
eq. (54) of :xref:`1007.1727` (:func:`~spey.hypothesis_testing.test_statistics.qmu`).
* ``'q0'``: performs the calculation using the discovery test statistic, see eq. (47)
of :xref:`1007.1727` :math:`q_{0}` (:func:`~spey.hypothesis_testing.test_statistics.q0`).
initial_muhat_value (``float``, default ``None``): Initial value for :math:`\hat\mu`.
par_bounds (``List[Tuple[float, float]]``, default ``None``): parameter bounds for
the optimiser.
statistical_model_options (``Dict[Text, Dict]``, default ``None``): backend specific
options. The dictionary key needs to be the backend name and the item needs to be the dictionary
holding the keyword arguments specific to that particular backend.
.. code-block:: python3
>>> statistical_model_options = {"default.uncorrelated_background" : {"init_pars" : [1., 3., 4.]}}
kwargs: keyword arguments for the optimiser.
Returns:
``Tuple[float, float]``:
:math:`\hat\mu` value and maximum value of the likelihood.
"""
allow_negative_signal: bool = True if test_statistics in ["q", "qmu"] else False
config: ModelConfig = ModelConfig(
poi_index=0,
minimum_poi=self.minimum_poi,
suggested_init=[initial_muhat_value or 0.0],
suggested_bounds=(
par_bounds or [(self.minimum_poi if allow_negative_signal else 0.0, 10.0)]
),
)
statistical_model_options = statistical_model_options or {}
data = self.generate_asimov_data(
expected=expected,
test_statistic=test_statistics,
statistical_model_options=statistical_model_options,
**optimiser_options,
)
def twice_nll(poi_test: Union[float, np.ndarray]) -> float:
"""Function to compute twice negative log-likelihood for a given poi test"""
return 2.0 * self.likelihood(
poi_test if isinstance(poi_test, float) else poi_test[0],
expected=expected,
data=data,
statistical_model_options=statistical_model_options,
**optimiser_options,
)
twice_nll, fit_params = fit(
func=twice_nll,
model_configuration=config,
**optimiser_options,
)
return fit_params[0], twice_nll / 2.0 if return_nll else np.exp(-twice_nll / 2.0)
def __matmul__(self, other: StatisticalModel):
new_model = UnCorrStatisticsCombiner(*self._statistical_models)
if isinstance(other, StatisticalModel):
new_model.append(other)
elif isinstance(other, UnCorrStatisticsCombiner):
for model in other:
new_model.append(model)
else:
raise ValueError(
f"Can not combine type<{type(other)}> with UnCorrStatisticsCombiner"
)
return new_model