Source code for mlscorecheck.check.regression._check_regression

"""
This module implements the testing of regression scores
"""

import numpy as np

from ...individual import Interval
from ...core import NUMERICAL_TOLERANCE, round_scores, safe_eval

__all__ = [
    "check_1_testset_no_kfold",
    "expand_regression_scores",
    "mean_average_error",
    "mean_squared_error",
    "root_mean_squared_error",
    "r_squared",
    "calculate_regression_scores",
    "generate_regression_problem_and_scores",
    "check_relations",
    "score_formulas",
]

rules = [{"score0": "mae", "score1": "rmse", "relation": "le"}]


def mean_average_error(y_true: np.array, y_pred: np.array) -> float:
    """
    The mean average error (MAE) regression performance score

    Args:
        y_true (np.array): the true labels
        y_pred (np.array): the predicted labels

    Returns:
        float: the MAE score
    """
    return np.mean(np.abs(y_true - y_pred))


def mean_squared_error(y_true: np.array, y_pred: np.array) -> float:
    """
    The mean squared error (MSE) regression performance score

    Args:
        y_true (np.array): the true labels
        y_pred (np.array): the predicted labels

    Returns:
        float: the MSE score
    """
    return np.mean((y_true - y_pred) ** 2)


def root_mean_squared_error(y_true: np.array, y_pred: np.array) -> float:
    """
    The root mean squared error (RMSE) regression performance score

    Args:
        y_true (np.array): the true labels
        y_pred (np.array): the predicted labels

    Returns:
        float: the RMSE score
    """
    return np.sqrt(mean_squared_error(y_true, y_pred))


def r_squared(y_true: np.array, y_pred: np.array) -> float:
    """
    The R squared (r2) regression performance score

    Args:
        y_true (np.array): the true labels
        y_pred (np.array): the predicted labels

    Returns:
        float: the R2 score
    """
    return 1.0 - np.sum((y_true - y_pred) ** 2) / (np.var(y_true) * y_true.shape[0])


regression_scores = {
    "mae": mean_average_error,
    "mse": mean_squared_error,
    "rmse": root_mean_squared_error,
    "r2": r_squared,
}


def calculate_regression_scores(
    y_true: np.array, y_pred: np.array, subset=None
) -> dict:
    """
    Calculate the performance scores for a regression problem

    Args:
        y_true (np.array): the true labels
        y_pred (np.array): the predicted labels
        subset (None|list(str)): the scores to calculate

    Returns:
        dict: the calculated scores
    """
    scores = {}

    for key, function in regression_scores.items():
        if subset is None or key in subset:
            scores[key] = function(y_true, y_pred)

    return scores


def generate_regression_problem_and_scores(
    random_state=None, max_n_samples=1000, subset=None, rounding_decimals=None
) -> (float, int, dict):
    """
    Generate a regression problem and corresponding scores

    Args:
        random_state (None|int|np.random.RandomState): the random state/seed to use
        max_n_samples (int): the maximum number of samples to be used
        subset (None|list(str)): the list of scores to be used
        rounding_decimals (None|int): the number of decimals to round to

    Returns:
        float, int, dict: the variance, the number of samples and the scores
    """
    if not isinstance(random_state, np.random.RandomState):
        random_state = np.random.RandomState(random_state)

    n_samples = random_state.randint(2, max_n_samples)

    y_true = np.random.random_sample(n_samples)
    y_pred = y_true + (np.random.random_sample(n_samples) - 0.5) / 10

    scores = calculate_regression_scores(y_true, y_pred, subset)

    scores = round_scores(scores, rounding_decimals)

    return np.var(y_true), n_samples, scores


score_formulas = {
    "mse": {"rmse": "rmse**2", "r2": "((1 - r2) * (var))"},
    "rmse": {"mse": "mse ** 0.5", "r2": "((1 - r2) * (var)) ** 0.5"},
    "r2": {"mse": "(1 - mse / var)", "rmse": "(1 - rmse**2 / var)"},
}


def expand_regression_scores(
    var: float, n_samples: int, scores: dict, eps, numerical_tolerance: float
) -> dict:
    """
    Generate scores from the ones available and expand the scores to intervals given the
    numerical uncertainty.

    Args:
        var (float): the variance of the evaluation set
        n_samples (int): the number of samples in the evaluation set
        scores (dict(str,float)): the scores to check ('mae', 'rmse', 'mse', 'r2')
        eps (float,dict(str,float)): the numerical uncertainty of the scores
        numerical_tolerance (float): the numerical tolerance of the test

    Returns:
        dict: the extended set of score intervals
    """
    scores = {
        key: Interval(
            value - eps - numerical_tolerance, value + eps + numerical_tolerance
        )
        for key, value in scores.items()
    }

    to_add = {}
    for key, value in score_formulas.items():
        if key not in scores:
            for sol, formula in value.items():
                if sol in scores:
                    to_add[key] = safe_eval(
                        formula, scores | {"var": var, "n_samples": n_samples}
                    )
                    break

    return scores | to_add


def check_relations(scores: dict) -> dict:
    """
    Check the relations of the scores

    Args:
        scores (dict(str,Interval)): the score and figure intervals

    Returns:
        dict: a summary of the analysis, with the following entries:

            - ``'inconsistency'`` (bool): whether an inconsistency has been identified
            - ``'details'`` (list(dict)): the details of the analysis, with the following entries
    """
    results = {"details": []}

    for rule in rules:
        if rule["score0"] in scores and rule["score1"] in scores:
            score0 = scores[rule["score0"]]
            score1 = scores[rule["score1"]]
            if rule["relation"] == "le":
                value = score0.lower_bound <= score1.upper_bound
            results["details"].append(rule | {"value": value})

    results["inconsistency"] = any(not result["value"] for result in results["details"])

    return results



[docs]
def check_1_testset_no_kfold(
    var: float,
    n_samples: int,
    scores: dict,
    eps,
    numerical_tolerance: float = NUMERICAL_TOLERANCE,
) -> dict:
    """
    The consistency test for regression scores calculated on a single test set
    with no k-folding

    Args:
        var (float): the variance of the evaluation set
        n_samples (int): the number of samples in the evaluation set
        scores (dict(str,float)): the scores to check ('mae', 'rmse', 'mse', 'r2')
        eps (float,dict(str,float)): the numerical uncertainty of the scores
        numerical_tolerance (float): the numerical tolerance of the test

    Returns:
        dict: a summary of the analysis, with the following entries:

            - ``'inconsistency'`` (bool): whether an inconsistency has been identified
            - ``'details'`` (list(dict)): the details of the analysis, with the following entries

    Examples:
        >>> from mlscorecheck.check.regression import check_1_testset_no_kfold
        >>> var = 0.08316192579267838
        >>> n_samples = 100
        >>> scores =  {'mae': 0.0254, 'r2': 0.9897}
        >>> result = check_1_testset_no_kfold(var=var,
                                                n_samples=n_samples,
                                                scores=scores,
                                                eps=1e-4)
        >>> result['inconsistency']
        # False

        >>> scores['mae'] = 0.03
        >>> result = check_1_testset_no_kfold(var=var,
                                            n_samples=n_samples,
                                            scores=scores,
                                            eps=1e-4)
        >>> result['inconsistency']
        # True
    """
    intervals = expand_regression_scores(
        var, n_samples, scores, eps, numerical_tolerance
    )

    return check_relations(intervals)