Source code for test.test_metrics

"""
Documentation
"""
# Python Modules
import logging
import os
import unittest

# 3rd Party Modules
import numpy as np
import tensorflow as tf

# Project Modules
from deletor.constants import MIN_FLOAT_32
from deletor.metrics import DiscountedCumulativeGain, NormalizedDiscountedCumulativeGain

os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '2'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

logging.getLogger('tensorflow').setLevel(logging.WARNING)

# tf.compat.v1.disable_eager_execution()


[docs]class TestMetrics(tf.test.TestCase):
    """
    Test cases for metrics. Test names that are prefixed with `tfr` are
    a subset of the test cases from the tensorflow ranking project to verify
    that the results are the same. However, these tests are not particularly
    robust and do not test padded queries, so additional tests are also
    included.
    """
    y_true = tf.ragged.constant([[4, 0, 3, 1],
                                 [1, 2, 3, 0, 4, 4],
                                 [0, 4],
                                 [-1, -2, 3],
                                 [0, 0, 0]])
    y_pred = tf.ragged.constant([[3.5,  2.2, 0.5, 1.0],
                                 [1.7,  2.3, 2.1, 1.1, 0.1, 1.0],
                                 [3.8,  0.8],
                                 [1.5, -1.0, 1.0],
                                 [0.1,  1.0, 0.3]])
    y_pred_dense = y_pred.to_tensor(MIN_FLOAT_32)
    exp_rank = np.array([[0, 1, 3, 2, 4, 5],
                         [1, 2, 0, 3, 5, 4],
                         [0, 1, 2, 3, 4, 5],
                         [0, 2, 1, 3, 4, 5],
                         [1, 2, 0, 3, 4, 5]])

[docs]    @classmethod
    def expected_dcg(cls, y_true, y_pred):
        y_pred = tf.cast(y_pred, tf.float32).to_tensor(MIN_FLOAT_32).numpy()
        rank = np.argsort(-y_pred, kind='stable')
        y_pad = y_true.to_tensor().numpy()
        ranking = np.stack([y_pad[i, idx] for i, idx in enumerate(rank)])
        exp_gain = np.power(2.0, ranking) - 1
        exp_discount = np.log2(np.arange(2, cls.exp_rank.shape[1] + 2))
        return np.sum(exp_gain / exp_discount, axis=-1)

[docs]    def test_dcg(self):
        y_true = self.y_true
        y_pred = self.y_pred_dense

        metric = DiscountedCumulativeGain()

        exp_dcg = self.expected_dcg(self.y_true, self.y_pred)
        act_dcg = metric(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_dcg, exp_dcg)

[docs]    def test_ndcg(self):
        y_true = self.y_true
        y_pred = self.y_pred_dense

        metric = NormalizedDiscountedCumulativeGain()

        exp_dcg = self.expected_dcg(self.y_true, self.y_pred)
        exp_idcg = self.expected_dcg(self.y_true, self.y_true)
        exp_ndcg = np.divide(exp_dcg, exp_idcg, out=np.zeros_like(exp_dcg), where=exp_idcg != 0)
        act_ndcg = metric(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_ndcg, exp_ndcg)

[docs]    def test_tfr_dcg(self):
        y_true = tf.constant([[0.0, 0.0, 1.0],
                              [0.0, 1.0, 2.0]])
        y_pred = tf.constant([[1.0, 3.0, 2.0],
                             [1.0, 2.0, 3.0]])
        weights = tf.constant([[1., 1., 1.],
                              [2., 2., 1.]])

        dcg_metric = DiscountedCumulativeGain()

        # Unweighted DCG
        expected_dcg = [0.63092977, 3.6309297]
        actual_dcg = dcg_metric(y_true, y_pred)
        np.testing.assert_array_almost_equal(actual_dcg, expected_dcg)

        # Weighted DCG
        expected_dcg = [0.63092977, 4.2618594]
        actual_dcg = dcg_metric(y_true, y_pred, weights)
        np.testing.assert_array_almost_equal(actual_dcg, expected_dcg)

        # Different functions
        gain_fn = lambda rel: tf.cast(rel, tf.float32)
        discount_fn = lambda p: tf.cast(tf.range(1, p), tf.float32)

        dcg_metric = DiscountedCumulativeGain(gain_fn=gain_fn, discount_fn=discount_fn)
        expected_dcg = [0.5, 2.5]
        actual_dcg = dcg_metric(y_true, y_pred)
        np.testing.assert_array_almost_equal(actual_dcg, expected_dcg)

[docs]    def test_tfr_ndcg(self):
        scores = tf.constant([[1., 3., 2.], [1., 2., 3.]])
        labels = tf.constant([[0., 0., 1.], [0., 1., 2.]])

        ndcg_metric = NormalizedDiscountedCumulativeGain()

        expected_ndcg = [0.63092977, 1.]
        actual_ndcg = ndcg_metric(labels, scores)
        np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)

        # Different functions
        gain_fn = lambda rel: tf.cast(rel, tf.float32)
        discount_fn = lambda p: tf.cast(tf.range(1, p), tf.float32)

        ndcg_metric = NormalizedDiscountedCumulativeGain(gain_fn=gain_fn, discount_fn=discount_fn)

        expected_ndcg = [0.5, 1.]
        actual_ndcg = ndcg_metric(labels, scores)
        np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)

[docs]    def test_tfr_ndcg_with_zero_relevance(self):
        scores = tf.constant([[1., 3., 2.], [1., 2., 3.]])
        labels = tf.constant([[0., 0., 0.], [0., 1., 2.]])

        ndcg_metric = NormalizedDiscountedCumulativeGain()

        expected_ndcg = [0, 1]
        actual_ndcg = ndcg_metric(labels, scores)
        np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)

[docs]    def test_tfr_ndcg_with_weights(self):
        scores = tf.constant([[1., 3., 2.], [1., 2., 3.]])
        labels = tf.constant([[0., 0., 1.], [0., 1., 2.]])
        weights = tf.constant([[1., 2., 3.], [4., 5., 6.]])

        ndcg_metric_0 = NormalizedDiscountedCumulativeGain()
        ndcg_metric_1 = NormalizedDiscountedCumulativeGain(k=1)

        expected_ndcg = [0, 1.]
        actual_ndcg = ndcg_metric_1(labels, scores, weights)
        np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)

        expected_ndcg = [0.63092977, 1.]
        actual_ndcg = ndcg_metric_0(labels, scores, weights)
        np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)

[docs]    def test_tfr_ndcg_with_weights_zero_relevance(self):
        # excepted_ndcg_zero_rel_1: 0.0
        # excepted_ndcg_zero_rel_2: 1.0
        # excepted_ndcg_zero_rel: 0.5
        scores = tf.constant([[1., 3., 2.], [1., 2., 3.]])
        labels = tf.constant([[0., 0., 0.], [0., 1., 2.]])
        weights = tf.constant([[1., 2., 3.], [4., 5., 6.]])

        ndcg_metric = NormalizedDiscountedCumulativeGain()

        expected_ndcg = [0.0, 1.0]
        actual_ndcg = ndcg_metric(labels, scores, weights)
        np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)


if __name__ == '__main__':
    unittest.main()