"""
Documentation
"""
# Python Modules
import logging
import os
import unittest
# 3rd Party Modules
import numpy as np
import tensorflow as tf
# Project Modules
from deletor.constants import MIN_FLOAT_32
from deletor.metrics import DiscountedCumulativeGain, NormalizedDiscountedCumulativeGain
os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '2'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
logging.getLogger('tensorflow').setLevel(logging.WARNING)
# tf.compat.v1.disable_eager_execution()
[docs]class TestMetrics(tf.test.TestCase):
"""
Test cases for metrics. Test names that are prefixed with `tfr` are
a subset of the test cases from the tensorflow ranking project to verify
that the results are the same. However, these tests are not particularly
robust and do not test padded queries, so additional tests are also
included.
"""
y_true = tf.ragged.constant([[4, 0, 3, 1],
[1, 2, 3, 0, 4, 4],
[0, 4],
[-1, -2, 3],
[0, 0, 0]])
y_pred = tf.ragged.constant([[3.5, 2.2, 0.5, 1.0],
[1.7, 2.3, 2.1, 1.1, 0.1, 1.0],
[3.8, 0.8],
[1.5, -1.0, 1.0],
[0.1, 1.0, 0.3]])
y_pred_dense = y_pred.to_tensor(MIN_FLOAT_32)
exp_rank = np.array([[0, 1, 3, 2, 4, 5],
[1, 2, 0, 3, 5, 4],
[0, 1, 2, 3, 4, 5],
[0, 2, 1, 3, 4, 5],
[1, 2, 0, 3, 4, 5]])
[docs] @classmethod
def expected_dcg(cls, y_true, y_pred):
y_pred = tf.cast(y_pred, tf.float32).to_tensor(MIN_FLOAT_32).numpy()
rank = np.argsort(-y_pred, kind='stable')
y_pad = y_true.to_tensor().numpy()
ranking = np.stack([y_pad[i, idx] for i, idx in enumerate(rank)])
exp_gain = np.power(2.0, ranking) - 1
exp_discount = np.log2(np.arange(2, cls.exp_rank.shape[1] + 2))
return np.sum(exp_gain / exp_discount, axis=-1)
[docs] def test_dcg(self):
y_true = self.y_true
y_pred = self.y_pred_dense
metric = DiscountedCumulativeGain()
exp_dcg = self.expected_dcg(self.y_true, self.y_pred)
act_dcg = metric(y_true, y_pred)
np.testing.assert_array_almost_equal(act_dcg, exp_dcg)
[docs] def test_ndcg(self):
y_true = self.y_true
y_pred = self.y_pred_dense
metric = NormalizedDiscountedCumulativeGain()
exp_dcg = self.expected_dcg(self.y_true, self.y_pred)
exp_idcg = self.expected_dcg(self.y_true, self.y_true)
exp_ndcg = np.divide(exp_dcg, exp_idcg, out=np.zeros_like(exp_dcg), where=exp_idcg != 0)
act_ndcg = metric(y_true, y_pred)
np.testing.assert_array_almost_equal(act_ndcg, exp_ndcg)
[docs] def test_tfr_dcg(self):
y_true = tf.constant([[0.0, 0.0, 1.0],
[0.0, 1.0, 2.0]])
y_pred = tf.constant([[1.0, 3.0, 2.0],
[1.0, 2.0, 3.0]])
weights = tf.constant([[1., 1., 1.],
[2., 2., 1.]])
dcg_metric = DiscountedCumulativeGain()
# Unweighted DCG
expected_dcg = [0.63092977, 3.6309297]
actual_dcg = dcg_metric(y_true, y_pred)
np.testing.assert_array_almost_equal(actual_dcg, expected_dcg)
# Weighted DCG
expected_dcg = [0.63092977, 4.2618594]
actual_dcg = dcg_metric(y_true, y_pred, weights)
np.testing.assert_array_almost_equal(actual_dcg, expected_dcg)
# Different functions
gain_fn = lambda rel: tf.cast(rel, tf.float32)
discount_fn = lambda p: tf.cast(tf.range(1, p), tf.float32)
dcg_metric = DiscountedCumulativeGain(gain_fn=gain_fn, discount_fn=discount_fn)
expected_dcg = [0.5, 2.5]
actual_dcg = dcg_metric(y_true, y_pred)
np.testing.assert_array_almost_equal(actual_dcg, expected_dcg)
[docs] def test_tfr_ndcg(self):
scores = tf.constant([[1., 3., 2.], [1., 2., 3.]])
labels = tf.constant([[0., 0., 1.], [0., 1., 2.]])
ndcg_metric = NormalizedDiscountedCumulativeGain()
expected_ndcg = [0.63092977, 1.]
actual_ndcg = ndcg_metric(labels, scores)
np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
# Different functions
gain_fn = lambda rel: tf.cast(rel, tf.float32)
discount_fn = lambda p: tf.cast(tf.range(1, p), tf.float32)
ndcg_metric = NormalizedDiscountedCumulativeGain(gain_fn=gain_fn, discount_fn=discount_fn)
expected_ndcg = [0.5, 1.]
actual_ndcg = ndcg_metric(labels, scores)
np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
[docs] def test_tfr_ndcg_with_zero_relevance(self):
scores = tf.constant([[1., 3., 2.], [1., 2., 3.]])
labels = tf.constant([[0., 0., 0.], [0., 1., 2.]])
ndcg_metric = NormalizedDiscountedCumulativeGain()
expected_ndcg = [0, 1]
actual_ndcg = ndcg_metric(labels, scores)
np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
[docs] def test_tfr_ndcg_with_weights(self):
scores = tf.constant([[1., 3., 2.], [1., 2., 3.]])
labels = tf.constant([[0., 0., 1.], [0., 1., 2.]])
weights = tf.constant([[1., 2., 3.], [4., 5., 6.]])
ndcg_metric_0 = NormalizedDiscountedCumulativeGain()
ndcg_metric_1 = NormalizedDiscountedCumulativeGain(k=1)
expected_ndcg = [0, 1.]
actual_ndcg = ndcg_metric_1(labels, scores, weights)
np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
expected_ndcg = [0.63092977, 1.]
actual_ndcg = ndcg_metric_0(labels, scores, weights)
np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
[docs] def test_tfr_ndcg_with_weights_zero_relevance(self):
# excepted_ndcg_zero_rel_1: 0.0
# excepted_ndcg_zero_rel_2: 1.0
# excepted_ndcg_zero_rel: 0.5
scores = tf.constant([[1., 3., 2.], [1., 2., 3.]])
labels = tf.constant([[0., 0., 0.], [0., 1., 2.]])
weights = tf.constant([[1., 2., 3.], [4., 5., 6.]])
ndcg_metric = NormalizedDiscountedCumulativeGain()
expected_ndcg = [0.0, 1.0]
actual_ndcg = ndcg_metric(labels, scores, weights)
np.testing.assert_array_almost_equal(actual_ndcg, expected_ndcg)
if __name__ == '__main__':
unittest.main()