Source code for test.test_losses

"""
Documentation
"""
# Python Modules
import logging
import math
import unittest

# 3rd Party Modules
import numpy as np
import tensorflow as tf
import deletor.tfutils as tfutils

tfutils.disable_gpu()

# Project Modules
from deletor.constants import MIN_FLOAT_32
from deletor.losses import RankingCrossEntropy, masked_softmax, MeanSquaredError, MultiLoss, \
    RankingSoftmax, ApproximateNormalizedDiscountedCumulativeGain, DCGLambdaWeight, \
    ApproximateBiDiNormalizedDiscountedCumulativeGain

np.set_printoptions(precision=8, suppress=True, edgeitems=20, linewidth=10000)

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)-15s [%(name)s]:%(lineno)d %(levelname)s %(message)s'
)


[docs]class TestLosses(unittest.TestCase):
    y_true = tf.ragged.constant([[4., 0., 3., 1.],
                                 [1., 2., 3., 0., 4., 4.],
                                 [0., 4.],
                                 [-1., -2., 3.],
                                 [0., 0., 0.]])
    y_pred = tf.ragged.constant([[3.5,  2.2, 0.5, 1.0],
                                 [1.7,  2.3, 2.1, 1.1, 0.1, 1.0],
                                 [3.8,  0.8],
                                 [1.5, -1.0, 1.0],
                                 [0.1,  1.0, 0.3]])

    exp_softmax = [[0.69638747, 0.01275478, 0.25618663, 0.03467109, 0., 0.],
                   [0.01936247, 0.05263266, 0.14307041, 0.00712306, 0.3889057, 0.3889057],
                   [0.01798621, 0.98201376, 0., 0., 0., 0.],
                   [0.01786798, 0.00657326, 0.9755587, 0., 0., 0.],
                   [0.33333334, 0.33333334, 0.33333334, 0., 0., 0.]]

[docs]    def test_masked_softmax(self):
        y_true = self.y_true.to_tensor(MIN_FLOAT_32)
        mask = tf.cast(tf.not_equal(y_true, MIN_FLOAT_32), tf.float32)

        exp_softmax = self.exp_softmax
        act_softmax = masked_softmax(y_true, mask)

        np.testing.assert_array_almost_equal(act_softmax, exp_softmax)

[docs]    def test_ranking_cross_entropy(self):
        y_true = self.y_true.to_tensor(MIN_FLOAT_32)
        y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)
        mask = tf.cast(tf.not_equal(y_true, MIN_FLOAT_32), tf.float32)
        exp_softmax = masked_softmax(y_pred, mask).numpy()
        log_p = np.nan_to_num(np.log(exp_softmax), nan=0.0, posinf=0.0, neginf=0.0)

        # Not normalized or reduced
        loss_fn = RankingCrossEntropy(normalize=False, reduce=False)

        exp_loss = -np.sum(y_true.numpy() * log_p, axis=1)
        act_loss = loss_fn(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_loss, exp_loss, decimal=5)

        # Normalized but not reduced
        loss_fn = RankingCrossEntropy(normalize=True, reduce=False)

        exp_loss = np.nan_to_num(
            -np.sum(y_true.numpy() * log_p, axis=1) / np.sum(mask * np.abs(y_true), axis=1)
        )
        act_loss = loss_fn(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_loss, exp_loss, decimal=5)

        # Reduced
        loss_fn = RankingCrossEntropy(normalize=False, reduce=True)

        exp_loss = np.mean(-np.sum(y_true.numpy() * log_p, axis=1))
        act_loss = loss_fn(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_loss, exp_loss, decimal=5)

[docs]    def test_approximate_ndcg(self):
        y_true = self.y_true.to_tensor(MIN_FLOAT_32)
        y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)

        loss_fn = ApproximateNormalizedDiscountedCumulativeGain(reduce=True)

        exp_loss = -2.205779552459717
        # exp_loss = -0.4411559104919434
        act_loss = loss_fn(y_true, y_pred)
        self.assertAlmostEqual(act_loss, exp_loss)

[docs]    def test_tfr_approximate_ndcg(self):
        # expected tfr_loss: -1.5697249174118042
        scores = [[1.4, -2.8, -0.4], [0., 1.8, 10.2], [1., 1.2, -3.2]]
        # ranks= [[1,    3,    2],   [3,  2,   1],    [2,  1,    3]]
        labels = [[0., 2., 1.], [1., 0., 3.], [0., 0., 0.]]
        # weights = [[2.], [1.], [1.]]
        # example_weights = [[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]

        loss_fn = ApproximateNormalizedDiscountedCumulativeGain(reduce=True)

        # TFR uses reduce_sum, but I think it should be reduce mean.
        # Note: reduce_mean seems to work better, but could just be a matter
        # of tuning the learning rate based on the magnitude of the loss.
        exp_loss = -1.5697249174118042
        act_loss = loss_fn(tf.constant(labels), tf.constant(scores))

        np.testing.assert_array_almost_equal(act_loss, exp_loss)

[docs]    def test_approximate_bidi_ndcg(self):
        y_true = self.y_true.to_tensor(MIN_FLOAT_32)
        y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)

        loss_fn = ApproximateBiDiNormalizedDiscountedCumulativeGain(reduce=True)

        act_loss = loss_fn(y_true, y_pred)
        exp_loss = 0.0842013955116272
        self.assertAlmostEqual(act_loss, exp_loss)

[docs]    def test_mean_squared_error(self):
        y_true = self.y_true.to_tensor(MIN_FLOAT_32)
        y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)

        # No reduction
        loss_fn = MeanSquaredError(reduce=False, weight_by_labels=False)
        exp_sqr = np.square(y_true - y_pred)
        exp_sum = np.sum(exp_sqr, axis=1)
        exp_n = np.sum(y_true > MIN_FLOAT_32, axis=1)
        exp_mse = exp_sum / exp_n
        act_mse = loss_fn(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_mse, exp_mse)

        # With reduction
        loss_fn = MeanSquaredError(reduce=True, weight_by_labels=False)

        exp_mse = np.mean(exp_mse)
        act_mse = loss_fn(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_mse, exp_mse)

[docs]    def test_ranking_softmax_loss(self):
        y_true = self.y_true.to_tensor(MIN_FLOAT_32)
        y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)

        loss_fn = RankingSoftmax(reduce=False)

        exp_loss = [14.216904, 30.82149, 12.194349,  0., 0.]
        act_loss = loss_fn(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_loss, exp_loss)

        loss_fn = RankingSoftmax(reduce=True)

        exp_loss = np.mean([14.216904, 30.82149, 12.194349,  0., 0.])
        act_loss = loss_fn(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_loss, exp_loss)

[docs]    def test_multi_loss(self):
        y_true = self.y_true.to_tensor(MIN_FLOAT_32)
        y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)

        rce = RankingCrossEntropy(normalize=False, reduce=False)
        mse = MeanSquaredError(reduce=False, weight_by_labels=False)
        loss_fn = MultiLoss([rce, mse], reduce=False)

        # Not reduced
        exp_loss = [17.051903, 35.289818, 24.53435, 0.25, 0.36666667]
        act_loss = loss_fn(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_loss, exp_loss)

        # Reduced
        loss_fn = MultiLoss([rce, mse], reduce=True)

        exp_loss = 15.498548
        act_loss = loss_fn(y_true, y_pred)
        self.assertAlmostEqual(act_loss, exp_loss)

        # With weights
        loss_fn = MultiLoss([rce, mse], [0.5, 2.0], reduce=False)
        exp_loss = [12.778452, 24.34741, 30.777176, 5.75, 0.73333335]
        act_loss = loss_fn(y_true, y_pred)
        np.testing.assert_array_almost_equal(act_loss, exp_loss)


[docs]class TestLambdaWeights(unittest.TestCase):
[docs]    def test_tfr_default(self):
        labels = tf.constant([[2.0, 1.0, 0.0]])
        ranks = tf.constant([[1, 2, 3]])
        lambda_weight = DCGLambdaWeight()
        exp_weights = [[[0.0000000, 0.5, 0.3333333],
                        [0.5000000, 0.0, 0.5000000],
                        [0.3333333, 0.5, 0.0000000]]]
        act_weights = lambda_weight.pair_weights(labels, ranks)
        np.testing.assert_array_almost_equal(act_weights, exp_weights)

[docs]    def test_tfr_smooth_fraction(self):
        labels = tf.constant([[2.0, 1.0, 0.0]])
        ranks = tf.constant([[1, 2, 3]])
        lambda_weight = DCGLambdaWeight(smooth_fraction=1.0)

        exp_weights = [[[0., 1. / 2., 2. * 2. / 3.],
                        [1. / 2., 0., 1. / 6.],
                        [2. * 2. / 3., 1. / 6., 0.]]]
        act_weights = lambda_weight.pair_weights(labels, ranks)
        np.testing.assert_array_almost_equal(act_weights, exp_weights)

        lambda_weight = DCGLambdaWeight(k=1, smooth_fraction=1.0)
        exp_weights = [[[0., 1., 2.],
                        [1., 0., 0.],
                        [2., 0., 0.]]]
        act_weights = lambda_weight.pair_weights(labels, ranks)
        np.testing.assert_array_almost_equal(act_weights, exp_weights)

[docs]    def test_tfr_topn(self):
        labels = tf.constant([[2.0, 1.0, 0.0]])
        ranks = tf.constant([[1, 2, 3]])
        lambda_weight = DCGLambdaWeight(k=1)

        exp_weights = [[[0., 1. / 2., 2. * 1. / 2.],
                        [1. / 2., 0., 0.],
                        [2. * 1. / 2., 0., 0.]]]
        act_weights = lambda_weight.pair_weights(labels, ranks)
        np.testing.assert_array_almost_equal(act_weights, exp_weights)

[docs]    def test_tfr_invalid_labels(self):
        labels = tf.constant([[2.0, 1.0, -1.0]])
        ranks = tf.constant([[1, 2, 3]])
        lambda_weight = DCGLambdaWeight()

        exp_weights = [[[0., 1. / 2., 0.], [1. / 2., 0., 0.], [0., 0., 0.]]]
        act_weights = lambda_weight.pair_weights(labels, ranks)
        np.testing.assert_array_almost_equal(act_weights, exp_weights)

[docs]    def test_tfr_gain_and_discount(self):
        labels = tf.constant([[2.0, 1.0]])
        ranks = tf.constant([[1, 2]])
        lambda_weight = DCGLambdaWeight(
            gain_fn=lambda x: tf.pow(2.0, x) - 1.0,
            discount_fn=lambda r: 1.0 / tf.math.log1p(r)
        )

        exp_weights = [[[0., 2. * (1. / math.log(2.) - 1. / math.log(3.))],
                        [2. * (1. / math.log(2.) - 1. / math.log(3.)), 0.]]]
        act_weights = lambda_weight.pair_weights(labels, ranks)
        np.testing.assert_array_almost_equal(act_weights, exp_weights)

[docs]    def test_tfr_normalized(self):
        labels = tf.constant([[1.0, 2.0]])
        ranks = tf.constant([[1, 2]])
        lambda_weight = DCGLambdaWeight(normalized=True)

        max_dcg = 2.5
        exp_weights = [[[0., 1. / 2. / max_dcg], [1. / 2. / max_dcg, 0.]]]
        act_weights = lambda_weight.pair_weights(labels, ranks)
        np.testing.assert_array_almost_equal(act_weights, exp_weights)

[docs]    def test_tfr_individual_weights(self):
        labels = tf.constant([[1.0, 2.0]])
        ranks = tf.constant([[1, 2]])
        lambda_weight = DCGLambdaWeight(normalized=True)

        max_dcg = 2.5
        exp_weights = [[1. / max_dcg / 1., 2. / max_dcg / 2.]]
        act_weights = lambda_weight.individual_weights(labels, ranks)
        np.testing.assert_array_almost_equal(act_weights, exp_weights)

[docs]    def test_tfr_create_ndcg_lambda_weight(self):
        labels = tf.constant([[2.0, 1.0]])
        ranks = tf.constant([[1, 2]])
        lambda_weight = DCGLambdaWeight(
            gain_fn=lambda label: tf.pow(2.0, label) - 1.0,
            discount_fn=lambda rank: 1. / tf.math.log1p(rank),
            normalized=True,
            smooth_fraction=0.0
        )

        max_dcg = 3.0 / math.log(2.) + 1.0 / math.log(3.)
        exp_weights = [[[0., 2. * (1. / math.log(2.) - 1. / math.log(3.)) / max_dcg],
                        [2. * (1. / math.log(2.) - 1. / math.log(3.)) / max_dcg, 0.]]]
        act_weights = lambda_weight.pair_weights(labels, ranks)
        np.testing.assert_array_almost_equal(act_weights, exp_weights)