Source code for test.test_metrics

"""
Documentation
"""
# Python Modules
import logging
import os
import unittest

# 3rd Party Modules
import numpy as np
import tensorflow as tf

# Project Modules
from deletor.constants import MIN_FLOAT_32
from deletor.metrics import DiscountedCumulativeGain, NormalizedDiscountedCumulativeGain

os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '2'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

logging.getLogger('tensorflow').setLevel(logging.WARNING)

# tf.compat.v1.disable_eager_execution()


[docs]class TestMetrics(tf.test.TestCase): """ Test cases for metrics. Test names that are prefixed with `tfr` are a subset of the test cases from the tensorflow ranking project to verify that the results are the same. However, these tests are not particularly robust and do not test padded queries, so additional tests are also included. """ y_true = tf.ragged.constant([[4, 0, 3, 1], [1, 2, 3, 0, 4, 4], [0, 4], [-1, -2, 3], [0, 0, 0]]) y_pred = tf.ragged.constant([[3.5, 2.2, 0.5, 1.0], [1.7, 2.3, 2.1, 1.1, 0.1, 1.0], [3.8, 0.8], [1.5, -1.0, 1.0], [0.1, 1.0, 0.3]]) y_pred_dense = y_pred.to_tensor(MIN_FLOAT_32) exp_rank = np.array([[0, 1, 3, 2, 4, 5], [1, 2, 0, 3, 5, 4], [0, 1, 2, 3, 4, 5], [0, 2, 1, 3, 4, 5], [1, 2, 0, 3, 4, 5]])
[docs] @classmethod def expected_dcg(cls, y_true, y_pred): y_pred = tf.cast(y_pred, tf.float32).to_tensor(MIN_FLOAT_32).numpy() rank = np.argsort(-y_pred, kind='stable') y_pad = y_true.to_tensor().numpy() ranking = np.stack([y_pad[i, idx] for i, idx in enumerate(rank)]) exp_gain = np.power(2.0, ranking) - 1 exp_discount = np.log2(np.arange(2, cls.exp_rank.shape[1] + 2)) return np.sum(exp_gain / exp_discount, axis=-1)
[docs] def test_dcg(self): y_true = self.y_true y_pred = self.y_pred_dense metric = DiscountedCumulativeGain() exp_dcg = self.expected_dcg(self.y_true, self.y_pred) act_dcg = metric(y_true, y_pred) np.testing.assert_array_almost_equal(act_dcg, exp_dcg)
[docs] def test_ndcg(self): y_true = self.y_true y_pred = self.y_pred_dense metric = NormalizedDiscountedCumulativeGain() exp_dcg = self.expected_dcg(self.y_true, self.y_pred) exp_idcg = self.expected_dcg(self.y_true, self.y_true) exp_ndcg = np.divide(exp_dcg, exp_idcg, out=np.zeros_like(exp_dcg), where=exp_idcg != 0) act_ndcg = metric(y_true, y_pred) np.testing.assert_array_almost_equal(act_ndcg, exp_ndcg)
[docs] def test_tfr_dcg(self): y_true = tf.constant([[0.0, 0.0, 1.0], [0.0, 1.0, 2.0]]) y_pred = tf.constant([[1.0, 3.0, 2.0], [1.0, 2.0, 3.0]]) weights = tf.constant([[1., 1., 1.], [2., 2., 1.]]) dcg_metric = DiscountedCumulativeGain() # Unweighted DCG expected_dcg = [0.63092977, 3.6309297] actual_dcg = dcg_metric(y_true, y_pred) np.testing.assert_array_almost_equal(actual_dcg, expected_dcg) # Weighted DCG expected_dcg = [0.63092977, 4.2618594] actual_dcg = dcg_metric(y_true, y_pred, weights) np.testing.assert_array_almost_equal(actual_dcg, expected_dcg) # Different functions gain_fn = lambda rel: tf.cast(rel, tf.float32) discount_fn = lambda p: tf.cast(tf.range(1, p), tf.float32) dcg_metric = DiscountedCumulativeGain(gain_fn=gain_fn, discount_fn=discount_fn) expected_dcg = [0.5, 2.5] actual_dcg = dcg_metric(y_true, y_pred) np.testing.assert_array_almost_equal(actual_dcg, expected_dcg)
[docs] def test_tfr_ndcg(self): scores = tf.constant([[1., 3., 2.], [1., 2., 3.]]) labels = tf.constant([[0., 0., 1.], [0., 1., 2.]]) ndcg_metric = NormalizedDiscountedCumulativeGain() expected_ndcg = [0.63092977, 1.] actual_ndcg = ndcg_metric(labels, scores) np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg) # Different functions gain_fn = lambda rel: tf.cast(rel, tf.float32) discount_fn = lambda p: tf.cast(tf.range(1, p), tf.float32) ndcg_metric = NormalizedDiscountedCumulativeGain(gain_fn=gain_fn, discount_fn=discount_fn) expected_ndcg = [0.5, 1.] actual_ndcg = ndcg_metric(labels, scores) np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
[docs] def test_tfr_ndcg_with_zero_relevance(self): scores = tf.constant([[1., 3., 2.], [1., 2., 3.]]) labels = tf.constant([[0., 0., 0.], [0., 1., 2.]]) ndcg_metric = NormalizedDiscountedCumulativeGain() expected_ndcg = [0, 1] actual_ndcg = ndcg_metric(labels, scores) np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
[docs] def test_tfr_ndcg_with_weights(self): scores = tf.constant([[1., 3., 2.], [1., 2., 3.]]) labels = tf.constant([[0., 0., 1.], [0., 1., 2.]]) weights = tf.constant([[1., 2., 3.], [4., 5., 6.]]) ndcg_metric_0 = NormalizedDiscountedCumulativeGain() ndcg_metric_1 = NormalizedDiscountedCumulativeGain(k=1) expected_ndcg = [0, 1.] actual_ndcg = ndcg_metric_1(labels, scores, weights) np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg) expected_ndcg = [0.63092977, 1.] actual_ndcg = ndcg_metric_0(labels, scores, weights) np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
[docs] def test_tfr_ndcg_with_weights_zero_relevance(self): # excepted_ndcg_zero_rel_1: 0.0 # excepted_ndcg_zero_rel_2: 1.0 # excepted_ndcg_zero_rel: 0.5 scores = tf.constant([[1., 3., 2.], [1., 2., 3.]]) labels = tf.constant([[0., 0., 0.], [0., 1., 2.]]) weights = tf.constant([[1., 2., 3.], [4., 5., 6.]]) ndcg_metric = NormalizedDiscountedCumulativeGain() expected_ndcg = [0.0, 1.0] actual_ndcg = ndcg_metric(labels, scores, weights) np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
if __name__ == '__main__': unittest.main()