Source code for spey_pyhf.data

import copy
import json
import os
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, Iterator, List, Optional, Tuple, Union

import numpy as np
from spey import ExpectationType
from spey.base import ModelConfig
from spey.system.exceptions import InvalidInput

from . import WorkspaceInterpreter, manager


[docs] class Base(ABC): """Base class for pyhf data input""" def __call__(self, expected: ExpectationType = ExpectationType.observed) -> Tuple: """ Retreive pyhf workspace Args: expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and p-values to be computed. * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth (default). * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth. * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit prescription which means that the SM will be assumed to be the truth. Returns: ``Tuple``: workspace, model and data """ return None, None, None
[docs] @abstractmethod def config( self, allow_negative_signal: bool = True, poi_upper_bound: Optional[float] = None ) -> ModelConfig: r""" Model configuration. Args: allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu` value will be allowed to be negative. poi_upper_bound (``float``, default ``40.0``): upper bound for parameter of interest, :math:`\mu`. Returns: ~spey.base.ModelConfig: Model configuration. Information regarding the position of POI in parameter list, suggested input and bounds. """
@property @abstractmethod def isAlive(self) -> bool: """Returns True if at least one bin has non-zero signal yield."""
[docs] @dataclass class SimpleModelData(Base): """ Dataclass for simple statistical model Args: signal_yields (``List[float]``): signal yields background_yields (``List[float]``): background yields data (``List[float]``): observations absolute_uncertainties (``List[float]``): absolute uncertainties on the background """ signal_yields: List[float] background_yields: List[float] data: List[float] absolute_uncertainties: List[float] def __post_init__(self): assert ( len(self.signal_yields) == len(self.background_yields) == len(self.data) == len(self.absolute_uncertainties) ), "Lenght of all input must be the same." def __call__(self, expected: ExpectationType = ExpectationType.observed) -> Tuple: """ Retreive pyhf workspace Args: expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and p-values to be computed. * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth (default). * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth. * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit prescription which means that the SM will be assumed to be the truth. Returns: ``Tuple``: workspace, model and data """ model = manager.pyhf.simplemodels.uncorrelated_background( self.signal_yields, self.background_yields, self.absolute_uncertainties ) data = ( self.background_yields if expected == ExpectationType.apriori else self.data ) + model.config.auxdata return None, model, data @property def isAlive(self) -> bool: """Returns True if at least one bin has non-zero signal yield.""" return any(x > 0 for x in self.signal_yields)
[docs] def config( self, allow_negative_signal: bool = True, poi_upper_bound: Optional[float] = None ) -> ModelConfig: r""" Model configuration. Args: allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu` value will be allowed to be negative. poi_upper_bound (``float``, default ``40.0``): upper bound for parameter of interest, :math:`\mu`. Returns: ~spey.base.ModelConfig: Model configuration. Information regarding the position of POI in parameter list, suggested input and bounds. """ signal = np.array(self.signal_yields) nb = np.array(self.background_yields) minimum_poi = ( -np.min(np.true_divide(nb[signal != 0.0], signal[signal != 0.0])).astype( np.float32 ) if len(signal[signal != 0.0]) > 0 else -np.inf ) _, model, _ = self() bounds = model.config.suggested_bounds() bounds[model.config.poi_index] = ( max(minimum_poi, -10.0) if allow_negative_signal else 0.0, bounds[model.config.poi_index][1] if not poi_upper_bound else poi_upper_bound, ) return ModelConfig( poi_index=model.config.poi_index, minimum_poi=minimum_poi, suggested_init=model.config.suggested_init(), suggested_bounds=bounds, parameter_names=model.config.par_names, )
[docs] @dataclass class FullStatisticalModelData(Base): """ Data container for the full statistical model. Args: signal_patch (``List[Dict]``): Patch data for signal model. please see `this link <https://pyhf.readthedocs.io/en/v0.7.0/likelihood.html>`_ for details on the structure of the input. background_only_model (``Dict`` or ``Text``): This input expects background only data that describes the full statistical model for the background. It also accepts ``str`` input which indicates the full path to the background only ``JSON`` file. """ signal_patch: List[Dict] background_only_model: Union[Dict, str] def __post_init__(self): if isinstance(self.background_only_model, str): if not os.path.isfile(self.background_only_model): raise FileNotFoundError(f"Can not find {self.background_only_model}") with open(self.background_only_model, encoding="uft-8") as f: self.background_only_model = json.load(f) interpreter = WorkspaceInterpreter(self.background_only_model) interpreter.add_patch(self.signal_patch) # set data as expected background events self.expected_background_yields = interpreter.expected_background_yields self.workspace = manager.pyhf.Workspace(self.background_only_model) self._model = None # Initialise config model = self()[1] min_ratio = [] for idc, channel in enumerate(self.background_only_model.get("channels", [])): current_signal = [] for sigch in self.signal_patch: if idc == int(sigch["path"].split("/")[2]): current_signal = np.array( sigch.get("value", {}).get("data", []), dtype=np.float32 ) break if len(current_signal) == 0: continue current_bkg = [] for ch in channel["samples"]: if len(current_bkg) == 0: current_bkg = np.zeros(shape=(len(ch["data"]),), dtype=np.float32) current_bkg += np.array(ch["data"], dtype=np.float32) min_ratio.append( np.min( np.true_divide( current_bkg[current_signal != 0.0], current_signal[current_signal != 0.0], ) ) if np.any(current_signal != 0.0) else np.inf ) self._minimum_poi = ( -np.min(min_ratio).astype(np.float32) if len(min_ratio) > 0 else -np.inf ) self._config = { "poi_index": model.config.poi_index, "minimum_poi": self._minimum_poi, "suggested_init": model.config.suggested_init(), "suggested_bounds": model.config.suggested_bounds(), "parameter_names": model.config.par_names, "suggested_fixed": model.config.suggested_fixed(), } @property def channels(self) -> Iterator[str]: """Return channel names""" return (ch["name"] for ch in self.background_only_model["channels"]) @property def channel_properties(self) -> Iterator[Tuple[int, str, int]]: """Returns an iterator for channel index, name and number of bins""" for idx, channel in enumerate(self.channels): yield idx, channel, self.workspace.channel_nbins[channel] def __call__( self, expected: ExpectationType = ExpectationType.observed, include_aux: bool = True, ) -> Tuple: """ Retreive pyhf workspace Args: expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and p-values to be computed. * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth (default). * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via post-fit prescriotion which means that the experimental data will be assumed to be the truth. * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit prescription which means that the SM will be assumed to be the truth. include_aux (`bool`, default `True`): Include auxiliary data. Returns: ``Tuple``: workspace, model and data """ if self._model is None: self._model = self.workspace.model( patches=[self.signal_patch], modifier_settings={ "normsys": {"interpcode": "code4"}, "histosys": {"interpcode": "code4p"}, }, ) if expected == ExpectationType.apriori: try: data = sum( ( self.expected_background_yields[ch] for ch in self._model.config.channels ), [], ) except KeyError as err: # provide a useful error message to guide the user to the solution missing_channels = [ ch for ch in self._model.config.channels if ch not in self.expected_background_yields ] raise InvalidInput( "Unable to construct expected data. " + (len(missing_channels) > 0) * ( "\nThis is likely due to missing channels in the signal patch. " + "The missing channels are: " + ", ".join(missing_channels) + "\nPlease provide appropriate action for the missing channels to continue." ) ) from err if include_aux: data += self._model.config.auxdata else: data = self.workspace.data(self._model, include_auxdata=include_aux) return self.workspace, self._model, data
[docs] def config( self, allow_negative_signal: bool = True, poi_upper_bound: Optional[float] = None ) -> ModelConfig: r""" Model configuration. Args: allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu` value will be allowed to be negative. poi_upper_bound (``float``, default ``40.0``): upper bound for parameter of interest, :math:`\mu`. Returns: ~spey.base.ModelConfig: Model configuration. Information regarding the position of POI in parameter list, suggested input and bounds. """ bounds = copy.deepcopy(self._config["suggested_bounds"]) bounds[self._config["poi_index"]] = ( max(self._minimum_poi, -10.0) if allow_negative_signal else 0.0, bounds[self._config["poi_index"]][1] if not poi_upper_bound else poi_upper_bound, ) return ModelConfig( poi_index=self._config["poi_index"], minimum_poi=self._minimum_poi, suggested_init=self._config["suggested_init"], suggested_bounds=bounds, parameter_names=self._config["parameter_names"], suggested_fixed=self._config["suggested_fixed"], )
@property def isAlive(self) -> bool: """Returns True if at least one bin has non-zero signal yield.""" for channel in ( ch for ch in self.signal_patch if ch.get("value", None) is not None ): if any(nsig > 0.0 for nsig in channel["value"].get("data", [])): return True return False