Source code for Orange.evaluation.performance_curves
import numpy as np
[docs]
class Curves:
# names of scores are standard acronyms, pylint: disable=invalid-name
"""
Computation of performance curves (ca, f1, precision, recall and the rest
of the zoo) from test results.
The class works with binary classes. Attribute `probs` contains ordered
probabilities and all curves represent performance statistics if an
instance is classified as positive if it equals or exceeds the threshold
in `probs`, that is, `sensitivity[i]` is the sensitivity of the classifier
that classifies an instances as positive if the probability of being
positive is at least `probs[i]`.
Class can be constructed by giving `probs` and `ytrue`, or from test
results (see :obj:`Curves.from_results`). The latter removes instances
with missing class values or predicted probabilities.
The class treats all results as obtained from a single run instead of
computing separate curves and fancy averaging.
Arguments:
probs (np.ndarray): vector of predicted probabilities
ytrue (np.ndarray): corresponding true classes
Attributes:
probs (np.ndarray): ordered vector of predicted probabilities
ytrue (np.ndarray): corresponding true classes
tot (int): total number of data instances
p (int): number of real positive instances
n (int): number of real negative instances
tp (np.ndarray): number of true positives (property computed from `tn`)
fp (np.ndarray): number of false positives (property computed from `tn`)
tn (np.ndarray): number of true negatives (property computed from `tn`)
fn (np.ndarray): number of false negatives (precomputed, not a property)
"""
def __init__(self, ytrue, probs):
sortind = np.argsort(probs)
self.probs = np.hstack((probs[sortind], [1]))
self.ytrue = ytrue[sortind]
self.fn = np.hstack(([0], np.cumsum(self.ytrue)))
self.tot = len(probs)
self.p = self.fn[-1]
self.n = self.tot - self.p
[docs]
@classmethod
def from_results(cls, results, target_class=None, model_index=None):
"""
Construct an instance of `Curves` from test results.
Args:
results (:obj:`Orange.evaluation.testing.Results`): test results
target_class (int): target class index; if the class is binary,
this defaults to `1`, otherwise it must be given
model_index (int): model index; if there is only one model, this
argument can be omitted
Returns:
curves (:obj:`Curves`)
"""
if model_index is None:
if results.probabilities.shape[0] != 1:
raise ValueError("Argument 'model_index' is required when "
"there are multiple models")
model_index = 0
if target_class is None:
if results.probabilities.shape[2] != 2:
raise ValueError("Argument 'target_class' is required when the "
"class is not binary")
target_class = 1
actual = results.actual
probs = results.probabilities[model_index, :, target_class]
nans = np.isnan(actual) + np.isnan(probs)
if nans.any():
actual = actual[~nans]
probs = probs[~nans]
return cls(actual == target_class, probs)
@property
def tn(self):
return np.arange(self.tot + 1) - self.fn
@property
def tp(self):
return self.p - self.fn
@property
def fp(self):
return self.n - self.tn
[docs]
def ca(self):
"""Classification accuracy curve"""
return (self.tp + self.tn) / self.tot
[docs]
def f1(self):
"""F1 curve"""
return 2 * self.tp / (2 * self.tp + self.fp + self.fn)
[docs]
def sensitivity(self):
"""Sensitivity curve"""
return self.tp / self.p
[docs]
def specificity(self):
"""Specificity curve"""
return self.tn / self.n
[docs]
def precision(self):
"""
Precision curve
The last element represents precision at threshold 1. Unless such
a probability appears in the data, the precision at this point is
undefined. To avoid this, we copy the previous value to the last.
"""
tp_fp = np.arange(self.tot, -1, -1)
tp_fp[-1] = 1 # avoid division by zero
prec = self.tp / tp_fp
prec[-1] = prec[-2]
return prec
[docs]
def recall(self):
"""Recall curve"""
return self.sensitivity()
[docs]
def ppv(self):
"""PPV curve; see the comment at :obj:`precision`"""
return self.precision()
[docs]
def npv(self):
"""
NPV curve
The first value is undefined (no negative instances). To avoid this,
we copy the second value into the first.
"""
tn_fn = np.arange(self.tot + 1)
tn_fn[0] = 1 # avoid division by zero
npv = self.tn / tn_fn
npv[0] = npv[1]
return npv
[docs]
def fpr(self):
"""FPR curve"""
return self.fp / self.n
[docs]
def tpr(self):
"""TPR curve"""
return self.sensitivity()