Source code for mlscorecheck.aggregated._fold

"""
This module implements an abstraction for a fold

The abstraction is created to facilitate the testing and evaluation
of the aggregated checking of scores by enabling the easy creation,
sampling, and the calculation of scores and the assemblance of
the linear programming problem
"""

import pulp as pl

from ..core import init_random_state, round_scores
from ..scores import calculate_scores_for_lp

from ._utils import random_identifier, aggregated_scores

__all__ = ["Fold"]



[docs]
class Fold:
    """
    Abstract representation of a fold
    """

    def __init__(self, p: int, n: int, identifier: str = None):
        """
        Constructor of a fold

        Args:
            p (int): the number of positives
            n (int): the number of negatives
            identifier (None|str): identifier of the fold, randomly generated if None
        """
        self.p = p
        self.n = n
        self.identifier = random_identifier(5) if identifier is None else identifier

        self.tp = None
        self.tn = None
        self.scores = None

        self.variable_names = {
            "tp": f"tp_{self.identifier}".replace("-", "_"),
            "tn": f"tn_{self.identifier}".replace("-", "_"),
        }


[docs]
    def to_dict(self) -> dict:
        """
        Dictionary representation of the fold

        Returns:
            dict: the dictionary representation
        """
        return {"p": self.p, "n": self.n, "identifier": self.identifier}



[docs]
    def sample_figures(self, random_state=None):
        """
        Samples the ``tp`` and ``tn`` figures

        Args:
            random_state (None|int|np.random.RandomState): the random state/seed to use

        Returns:
            Fold: the self object after sampling
        """
        random_state = init_random_state(random_state)

        self.tp = random_state.randint(self.p + 1)
        self.tn = random_state.randint(self.n + 1)

        return self



[docs]
    def calculate_scores(
        self, rounding_decimals: int = None, score_subset: list = None
    ) -> dict:
        """
        Calculate the scores for the fold

        Args:
            rounding_decimals (int|None): the number of decimals to round to
            score_subset (list): the subset of scores to calculate

        Returns:
            dict: the scores
        """
        score_subset = score_subset if score_subset is not None else aggregated_scores

        self.scores = calculate_scores_for_lp(
            {"p": self.p, "n": self.n, "tp": self.tp, "tn": self.tn},
            score_subset=score_subset,
        )

        return (
            self.scores
            if rounding_decimals is None
            else round_scores(self.scores, rounding_decimals)
        )



[docs]
    def set_initial_values(self, scores):
        """
        Sets the initial values for the tp and tn variables

        Args:
            scores (dict): the dictionary of scores
        """
        if "acc" in scores:
            tp_init = scores["acc"] * self.p
            tn_init = scores["acc"] * self.n
        if "bacc" in scores:
            tp_init = scores["bacc"] * self.p
            tn_init = scores["bacc"] * self.n

        if "sens" in scores:
            tp_init = scores["sens"] * self.p
        if "spec" in scores:
            tn_init = scores["spec"] * self.n

        self.tp.setInitialValue(int(tp_init))
        self.tn.setInitialValue(int(tn_init))



[docs]
    def init_lp(self, scores: dict = None):
        """
        Initialize a linear programming problem by creating the variables for the fold

        Args:
            scores (dict|None): the score values to be used to set initial values

        Returns:
            pl.LpProblem: the updated problem
        """
        self.tp = pl.LpVariable(self.variable_names["tp"], 0, self.p, pl.LpInteger)
        self.tn = pl.LpVariable(self.variable_names["tn"], 0, self.n, pl.LpInteger)

        if scores is not None:
            self.set_initial_values(scores)

        score_subset = aggregated_scores
        if scores is not None:
            score_subset = list(set(scores.keys()).intersection(set(aggregated_scores)))

        self.calculate_scores(score_subset=score_subset)



[docs]
    def populate(self, lp_problem: pl.LpProblem) -> pl.LpProblem:
        """
        Populate the fold with the ``tp`` and ``tn`` values from the linear program

        Args:
            lp_problem (pl.LpProblem): the linear programming problem

        Returns:
            obj: the self object populated with the ``tp`` and ``tn`` scores
        """
        for variable in lp_problem.variables():
            if variable.name == self.variable_names["tp"]:
                self.tp = variable.varValue
            if variable.name == self.variable_names["tn"]:
                self.tn = variable.varValue

        return self