Skip to content

Defaults

eva.metrics.BinaryClassificationMetrics

Bases: MetricCollection

Default metrics for binary classification tasks.

Parameters:

Name Type Description Default
threshold float

Threshold for transforming probability to binary (0,1) predictions

0.5
ignore_index int | None

Specifies a target value that is ignored and does not contribute to the metric calculation.

None
prefix str | None

A string to append in front of the keys of the output dict.

None
postfix str | None

A string to append after the keys of the output dict.

None
Source code in src/eva/core/metrics/defaults/classification/binary.py
def __init__(
    self,
    threshold: float = 0.5,
    ignore_index: int | None = None,
    prefix: str | None = None,
    postfix: str | None = None,
) -> None:
    """Initializes the binary classification metrics.

    Args:
        threshold: Threshold for transforming probability to binary (0,1) predictions
        ignore_index: Specifies a target value that is ignored and does not
            contribute to the metric calculation.
        prefix: A string to append in front of the keys of the output dict.
        postfix: A string to append after the keys of the output dict.
    """
    super().__init__(
        metrics=[
            classification.BinaryAUROC(
                ignore_index=ignore_index,
            ),
            classification.BinaryAccuracy(
                threshold=threshold,
                ignore_index=ignore_index,
            ),
            binary_balanced_accuracy.BinaryBalancedAccuracy(
                threshold=threshold,
                ignore_index=ignore_index,
            ),
            classification.BinaryF1Score(
                threshold=threshold,
                ignore_index=ignore_index,
            ),
            classification.BinaryPrecision(
                threshold=threshold,
                ignore_index=ignore_index,
            ),
            classification.BinaryRecall(
                threshold=threshold,
                ignore_index=ignore_index,
            ),
        ],
        prefix=prefix,
        postfix=postfix,
        compute_groups=[
            [
                "BinaryAccuracy",
                "BinaryBalancedAccuracy",
                "BinaryF1Score",
                "BinaryPrecision",
                "BinaryRecall",
            ],
            [
                "BinaryAUROC",
            ],
        ],
    )

eva.metrics.MulticlassClassificationMetrics

Bases: MetricCollection

Default metrics for multi-class classification tasks.

Parameters:

Name Type Description Default
num_classes int

Integer specifying the number of classes.

required
average Literal['macro', 'weighted', 'none']

Defines the reduction that is applied over labels.

'macro'
ignore_index int | None

Specifies a target value that is ignored and does not contribute to the metric calculation.

None
prefix str | None

A string to append in front of the keys of the output dict.

None
postfix str | None

A string to append after the keys of the output dict.

None
input_type Literal['logits', 'discrete']

Type of input predictions - "logits" for probabilities/logits or "discrete" for discrete class predictions. Determines which metrics are applicable.

'logits'
Source code in src/eva/core/metrics/defaults/classification/multiclass.py
def __init__(
    self,
    num_classes: int,
    average: Literal["macro", "weighted", "none"] = "macro",
    ignore_index: int | None = None,
    prefix: str | None = None,
    postfix: str | None = None,
    input_type: Literal["logits", "discrete"] = "logits",
) -> None:
    """Initializes the multi-class classification metrics.

    Args:
        num_classes: Integer specifying the number of classes.
        average: Defines the reduction that is applied over labels.
        ignore_index: Specifies a target value that is ignored and does not
            contribute to the metric calculation.
        prefix: A string to append in front of the keys of the output dict.
        postfix: A string to append after the keys of the output dict.
        input_type: Type of input predictions - "logits" for probabilities/logits
            or "discrete" for discrete class predictions. Determines which metrics
            are applicable.
    """
    metrics = [
        classification.MulticlassAccuracy(
            num_classes=num_classes,
            average=average,
            ignore_index=ignore_index,
        ),
        classification.MulticlassF1Score(
            num_classes=num_classes,
            average=average,
            ignore_index=ignore_index,
        ),
        classification.MulticlassPrecision(
            num_classes=num_classes,
            average=average,
            ignore_index=ignore_index,
        ),
        classification.MulticlassRecall(
            num_classes=num_classes,
            average=average,
            ignore_index=ignore_index,
        ),
    ]

    compute_groups = [
        [
            "MulticlassAccuracy",
            "MulticlassF1Score",
            "MulticlassPrecision",
            "MulticlassRecall",
        ]
    ]

    if input_type == "logits":
        metrics.append(
            classification.MulticlassAUROC(
                num_classes=num_classes,
                average=average,
                ignore_index=ignore_index,
            )
        )
        compute_groups.append(["MulticlassAUROC"])

    super().__init__(
        metrics=metrics,
        prefix=prefix,
        postfix=postfix,
        compute_groups=compute_groups,
    )