Source code for mlscorecheck.check.bundles.ehg._tpehg

"""
Test functions for the EHG problem
"""

from ....core import NUMERICAL_TOLERANCE
from ....experiments import get_experiment
from ...binary import check_1_dataset_unknown_folds_mos

__all__ = ['check_tpehg']


[docs]
def check_tpehg(scores: dict,
                eps,
                n_folds: int,
                n_repeats: int,
                *,
                score_bounds: dict = None,
                solver_name: str = None,
                timeout: int = None,
                verbosity: int = 1,
                numerical_tolerance: float = NUMERICAL_TOLERANCE) -> dict:
    """
    Checks the cross-validated TPEHG scores

    Args:
        scores (dict(str,float)): the dictionary of scores (supports only 'acc', 'sens', 'spec',
                                    'bacc'). Full names in camel case, like
                                'positive_predictive_value', synonyms, like 'true_positive_rate'
                                or 'tpr' instead of 'sens' and complements, like
                                'false_positive_rate' for (1 - 'spec') can also be used.
        eps (float|dict(str,float)): the numerical uncertainties
        n_folds (int): the number of folds
        n_repeats (int): the number of repetitions
        score_bounds (dict(str,tuple(float,float))): the potential bounds on the scores
                                                            of the folds
        solver_name (None|str): the solver to use
        timeout (None|int): the timeout for the linear programming solver in seconds
        verbosity (int): the verbosity level of the pulp linear programming solver
                            0: silent, non-zero: verbose.
        numerical_tolerance (float): in practice, beyond the numerical uncertainty of
                                    the scores, some further tolerance is applied. This is
                                    orders of magnitude smaller than the uncertainty of the
                                    scores. It does ensure that the specificity of the test
                                    is 1, it might slightly decrease the sensitivity.

    Returns:
        dict: A dictionary containing the results of the consistency check. The dictionary
        includes the following keys:

            - ``'inconsistency'``:
                A boolean flag indicating whether the set of feasible true
                positive (tp) and true negative (tn) pairs is empty. If True,
                it indicates that the provided scores are not consistent with the experiment.
            - ``'details'``:
                A list of dictionaries containing the details of the consistency tests. Each
                entry contains the specification of the folds being tested and the
                outcome of the ``check_1_dataset_known_folds_mos`` function.

    Examples:
        >>> from mlscorecheck.check.bundles.ehg import check_tpehg
        >>> # the 5-fold cross-validation scores reported in the paper
        >>> scores = {'acc': 0.9447, 'sens': 0.9139, 'spec': 0.9733}
        >>> eps = 0.0001
        >>> results = check_tpehg(scores=scores,
                                    eps=eps,
                                    n_folds=5,
                                    n_repeats=1)
        >>> results['inconsistency']
        # True
    """
    evaluation = {'dataset': get_experiment('ehg.tpehg'),
                    'folding': {'n_folds': n_folds, 'n_repeats': n_repeats}}

    return check_1_dataset_unknown_folds_mos(scores=scores,
                                                    eps=eps,
                                                    dataset=evaluation['dataset'],
                                                    folding=evaluation['folding'],
                                                    fold_score_bounds=score_bounds,
                                                    solver_name=solver_name,
                                                    timeout=timeout,
                                                    verbosity=verbosity,
                                                    numerical_tolerance=numerical_tolerance)