Source code for mixmo.core.metrics_ensemble

"""
Ensembling metrics
Inspired from
* https://github.com/kbogas/EnsembleDiversityTests/blob/master/EnsembleDiversityTests.py
* https://github.com/scikit-learn-contrib/DESlib
"""

import numpy as np
from collections import OrderedDict

from mixmo.utils import misc
from mixmo.utils.logger import get_logger


LOGGER = get_logger(__name__, level="DEBUG")


[docs]class MetricsEnsemble(object):
    """
    Class Wrapper to get Diversity Measures over collection of predictions.
    Args:
        @predictions: list of lists. Each sublist contains the predictions
                      of a classifier
        @names: list of strings. Each string is the name of the classifier.
        @true: list of labels. Each label is the truth label
    """

[docs]    def __init__(self, predictions, names, true):

        N = len(true)
        labels = set(true)
        if len(predictions) != len(names):
            raise AttributeError(
                'Number of classifiers is different than number \
                                  of names. %d != %d.'                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     % (len(predictions), len(names))
            )
        for i, predict in enumerate(predictions):
            if len(predict) != N:
                raise AttributeError(
                    'Number of predictions of classifier %s is different then the number of true labels. %d != %d'                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         % (names[i], len(predict), N)
                )
            if labels.isdisjoint(set(predict)):
                import pdb; pdb.set_trace()
                raise AttributeError(
                    'Label in predictions of %s not in truth set.'                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         % (names[i])
                )
        self.names = names
        self.true = true
        self.predictions = predictions

    def get_report(self, print_flag=False):
        stats = OrderedDict({})
        misc.clean_update(stats, self.get_diversity_ratioerrors(print_flag=print_flag))
        misc.clean_update(stats, self.get_individualaccuracies(print_flag=print_flag))
        return stats

    def get_diversity_ratioerrors(self, print_flag=True):
        prediction_matrix = np.transpose(np.array(self.predictions))
        stats = OrderedDict({})
        if print_flag:
            print('### Pairwise Diversity Metrics: ###')
        for diversity_name, diversity_func in [
                ("diversity_ratioerrors", ratio_errors),
                ]:
            diversity_matrix = compute_pairwise_diversity(
                targets=self.true,
                prediction_matrix=prediction_matrix,
                diversity_func=diversity_func)
            value = np.mean(compute_mean_without_diagonal(diversity_matrix).tolist())
            stats[diversity_name] = {
                "value": value,
                "string": f"{value:.5}"}
            if print_flag:
                print(f"Avg. {diversity_name}: {value}")
        return stats


[docs]    def get_individualaccuracies(self, print_flag=True):
        """
        """
        meanaccuracy, accuracies = get_accuracy_multi(self.predictions, self.true)

        if print_flag:
            print(f"Accuracies: {accuracies}")
            print(f"Mean accuracy: {meanaccuracy}")
        stats = {"accuracy_mean": meanaccuracy}

        for i, accuracy in enumerate(accuracies):
            stats["accuracy_" + str(i)] = accuracy

        return {
            key: {
                "value": accuracy, "string": f"{accuracy:05.2%}"}
            for key, accuracy in stats.items()
            }


[docs]    def help(self):
        """Just a helper function to print the class docstring."""
        return self.__doc__


[docs]def get_accuracy_multi(predictions, y_true):

    num_labels = len(y_true)
    accurate_predictions = [0 for _ in predictions]
    for j in range(num_labels):
        for pred_i, pred in enumerate(predictions):
            if pred[j] == y_true[j]:
                accurate_predictions[pred_i] += 1

    accuracies = [accurate/num_labels for accurate in accurate_predictions]

    return np.mean(accuracies), accuracies



[docs]def _process_predictions(y, y_pred1, y_pred2):
    """Pre-process the predictions of a pair of base classifiers for the
    computation of the diversity measures

    Parameters
    ----------
    y : array of shape = [n_samples]:
        class labels of each sample.

    y_pred1 : array of shape = [n_samples]:
              predicted class labels by the classifier 1 for each sample.

    y_pred2 : array of shape = [n_samples]:
              predicted class labels by the classifier 2 for each sample.

    Returns
    -------
    N00 : Percentage of samples that both classifiers predict the wrong label

    N10 : Percentage of samples that only classifier 2 predicts the wrong label

    N10 : Percentage of samples that only classifier 1 predicts the wrong label

    N11 : Percentage of samples that both classifiers predict the correct label
    """
    size_y = len(y)
    if size_y != len(y_pred1) or size_y != len(y_pred2):
        raise ValueError('The vector with class labels must have the same size.')

    N00, N10, N01, N11 = 0.0, 0.0, 0.0, 0.0
    for index in range(size_y):
        if y_pred1[index] == y[index] and y_pred2[index] == y[index]:
            N11 += 1.0
        elif y_pred1[index] == y[index] and y_pred2[index] != y[index]:
            N10 += 1.0
        elif y_pred1[index] != y[index] and y_pred2[index] == y[index]:
            N01 += 1.0
        else:
            N00 += 1.0

    return N00 / size_y, N10 / size_y, N01 / size_y, N11 / size_y


[docs]def ratio_errors(y, y_pred1, y_pred2):
    """Calculates Ratio of errors diversity measure between a pair of
    classifiers. A higher value means that the base classifiers are less likely
    to make the same errors. The ratio must be maximized for a higher diversity

    Parameters
    ----------
    y : array of shape = [n_samples]:
        class labels of each sample.

    y_pred1 : array of shape = [n_samples]:
              predicted class labels by the classifier 1 for each sample.

    y_pred2 : array of shape = [n_samples]:
              predicted class labels by the classifier 2 for each sample.

    Returns
    -------
    ratio : The q-statistic measure between two classifiers

    References
    ----------
    Aksela, Matti. "Comparison of classifier selection methods for improving
    committee performance."
    Multiple Classifier Systems (2003): 159-159.
    """
    N00, N10, N01, N11 = _process_predictions(y, y_pred1, y_pred2)
    if N00 == 0:
        LOGGER.warning("No shared errors !")
        ratio = 2 * (N01 + N10)
    else:
        ratio = (N01 + N10) / N00
    return ratio


[docs]def compute_pairwise_diversity(targets, prediction_matrix, diversity_func):
    """Computes the pairwise diversity matrix.

     Parameters
     ----------
     targets : array of shape = [n_samples]:
        Class labels of each sample in X.

     prediction_matrix : array of shape = [n_samples, n_classifiers]:
        Predicted class labels for each classifier in the pool

     diversity_func : Function
        Function used to estimate the pairwise diversity

     Returns
     -------
     diversity : array of shape = [n_classifiers]
        The average pairwise diversity matrix calculated for the pool of
        classifiers

     """
    n_classifiers = prediction_matrix.shape[1]
    diversity_matrix = np.zeros([n_classifiers, n_classifiers])

    for clf_index in range(n_classifiers):
        for clf_index2 in range(clf_index + 1, n_classifiers):
            this_diversity = diversity_func(
                targets, prediction_matrix[:, clf_index], prediction_matrix[:, clf_index2]
            )

            diversity_matrix[clf_index, clf_index2] = this_diversity
            diversity_matrix[clf_index2, clf_index] = this_diversity

    return diversity_matrix


[docs]def compute_mean_without_diagonal(matrix):
    return np.sum(matrix, axis=1) / (matrix.shape[0] - 1)