Source code for test.test_losses

"""
Documentation
"""
# Python Modules
import logging
import math
import unittest

# 3rd Party Modules
import numpy as np
import tensorflow as tf
import deletor.tfutils as tfutils

tfutils.disable_gpu()

# Project Modules
from deletor.constants import MIN_FLOAT_32
from deletor.losses import RankingCrossEntropy, masked_softmax, MeanSquaredError, MultiLoss, \
    RankingSoftmax, ApproximateNormalizedDiscountedCumulativeGain, DCGLambdaWeight, \
    ApproximateBiDiNormalizedDiscountedCumulativeGain

np.set_printoptions(precision=8, suppress=True, edgeitems=20, linewidth=10000)

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)-15s [%(name)s]:%(lineno)d %(levelname)s %(message)s'
)


[docs]class TestLosses(unittest.TestCase): y_true = tf.ragged.constant([[4., 0., 3., 1.], [1., 2., 3., 0., 4., 4.], [0., 4.], [-1., -2., 3.], [0., 0., 0.]]) y_pred = tf.ragged.constant([[3.5, 2.2, 0.5, 1.0], [1.7, 2.3, 2.1, 1.1, 0.1, 1.0], [3.8, 0.8], [1.5, -1.0, 1.0], [0.1, 1.0, 0.3]]) exp_softmax = [[0.69638747, 0.01275478, 0.25618663, 0.03467109, 0., 0.], [0.01936247, 0.05263266, 0.14307041, 0.00712306, 0.3889057, 0.3889057], [0.01798621, 0.98201376, 0., 0., 0., 0.], [0.01786798, 0.00657326, 0.9755587, 0., 0., 0.], [0.33333334, 0.33333334, 0.33333334, 0., 0., 0.]]
[docs] def test_masked_softmax(self): y_true = self.y_true.to_tensor(MIN_FLOAT_32) mask = tf.cast(tf.not_equal(y_true, MIN_FLOAT_32), tf.float32) exp_softmax = self.exp_softmax act_softmax = masked_softmax(y_true, mask) np.testing.assert_array_almost_equal(act_softmax, exp_softmax)
[docs] def test_ranking_cross_entropy(self): y_true = self.y_true.to_tensor(MIN_FLOAT_32) y_pred = self.y_pred.to_tensor(MIN_FLOAT_32) mask = tf.cast(tf.not_equal(y_true, MIN_FLOAT_32), tf.float32) exp_softmax = masked_softmax(y_pred, mask).numpy() log_p = np.nan_to_num(np.log(exp_softmax), nan=0.0, posinf=0.0, neginf=0.0) # Not normalized or reduced loss_fn = RankingCrossEntropy(normalize=False, reduce=False) exp_loss = -np.sum(y_true.numpy() * log_p, axis=1) act_loss = loss_fn(y_true, y_pred) np.testing.assert_array_almost_equal(act_loss, exp_loss, decimal=5) # Normalized but not reduced loss_fn = RankingCrossEntropy(normalize=True, reduce=False) exp_loss = np.nan_to_num( -np.sum(y_true.numpy() * log_p, axis=1) / np.sum(mask * np.abs(y_true), axis=1) ) act_loss = loss_fn(y_true, y_pred) np.testing.assert_array_almost_equal(act_loss, exp_loss, decimal=5) # Reduced loss_fn = RankingCrossEntropy(normalize=False, reduce=True) exp_loss = np.mean(-np.sum(y_true.numpy() * log_p, axis=1)) act_loss = loss_fn(y_true, y_pred) np.testing.assert_array_almost_equal(act_loss, exp_loss, decimal=5)
[docs] def test_approximate_ndcg(self): y_true = self.y_true.to_tensor(MIN_FLOAT_32) y_pred = self.y_pred.to_tensor(MIN_FLOAT_32) loss_fn = ApproximateNormalizedDiscountedCumulativeGain(reduce=True) exp_loss = -2.205779552459717 # exp_loss = -0.4411559104919434 act_loss = loss_fn(y_true, y_pred) self.assertAlmostEqual(act_loss, exp_loss)
[docs] def test_tfr_approximate_ndcg(self): # expected tfr_loss: -1.5697249174118042 scores = [[1.4, -2.8, -0.4], [0., 1.8, 10.2], [1., 1.2, -3.2]] # ranks= [[1, 3, 2], [3, 2, 1], [2, 1, 3]] labels = [[0., 2., 1.], [1., 0., 3.], [0., 0., 0.]] # weights = [[2.], [1.], [1.]] # example_weights = [[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]] loss_fn = ApproximateNormalizedDiscountedCumulativeGain(reduce=True) # TFR uses reduce_sum, but I think it should be reduce mean. # Note: reduce_mean seems to work better, but could just be a matter # of tuning the learning rate based on the magnitude of the loss. exp_loss = -1.5697249174118042 act_loss = loss_fn(tf.constant(labels), tf.constant(scores)) np.testing.assert_array_almost_equal(act_loss, exp_loss)
[docs] def test_approximate_bidi_ndcg(self): y_true = self.y_true.to_tensor(MIN_FLOAT_32) y_pred = self.y_pred.to_tensor(MIN_FLOAT_32) loss_fn = ApproximateBiDiNormalizedDiscountedCumulativeGain(reduce=True) act_loss = loss_fn(y_true, y_pred) exp_loss = 0.0842013955116272 self.assertAlmostEqual(act_loss, exp_loss)
[docs] def test_mean_squared_error(self): y_true = self.y_true.to_tensor(MIN_FLOAT_32) y_pred = self.y_pred.to_tensor(MIN_FLOAT_32) # No reduction loss_fn = MeanSquaredError(reduce=False, weight_by_labels=False) exp_sqr = np.square(y_true - y_pred) exp_sum = np.sum(exp_sqr, axis=1) exp_n = np.sum(y_true > MIN_FLOAT_32, axis=1) exp_mse = exp_sum / exp_n act_mse = loss_fn(y_true, y_pred) np.testing.assert_array_almost_equal(act_mse, exp_mse) # With reduction loss_fn = MeanSquaredError(reduce=True, weight_by_labels=False) exp_mse = np.mean(exp_mse) act_mse = loss_fn(y_true, y_pred) np.testing.assert_array_almost_equal(act_mse, exp_mse)
[docs] def test_ranking_softmax_loss(self): y_true = self.y_true.to_tensor(MIN_FLOAT_32) y_pred = self.y_pred.to_tensor(MIN_FLOAT_32) loss_fn = RankingSoftmax(reduce=False) exp_loss = [14.216904, 30.82149, 12.194349, 0., 0.] act_loss = loss_fn(y_true, y_pred) np.testing.assert_array_almost_equal(act_loss, exp_loss) loss_fn = RankingSoftmax(reduce=True) exp_loss = np.mean([14.216904, 30.82149, 12.194349, 0., 0.]) act_loss = loss_fn(y_true, y_pred) np.testing.assert_array_almost_equal(act_loss, exp_loss)
[docs] def test_multi_loss(self): y_true = self.y_true.to_tensor(MIN_FLOAT_32) y_pred = self.y_pred.to_tensor(MIN_FLOAT_32) rce = RankingCrossEntropy(normalize=False, reduce=False) mse = MeanSquaredError(reduce=False, weight_by_labels=False) loss_fn = MultiLoss([rce, mse], reduce=False) # Not reduced exp_loss = [17.051903, 35.289818, 24.53435, 0.25, 0.36666667] act_loss = loss_fn(y_true, y_pred) np.testing.assert_array_almost_equal(act_loss, exp_loss) # Reduced loss_fn = MultiLoss([rce, mse], reduce=True) exp_loss = 15.498548 act_loss = loss_fn(y_true, y_pred) self.assertAlmostEqual(act_loss, exp_loss) # With weights loss_fn = MultiLoss([rce, mse], [0.5, 2.0], reduce=False) exp_loss = [12.778452, 24.34741, 30.777176, 5.75, 0.73333335] act_loss = loss_fn(y_true, y_pred) np.testing.assert_array_almost_equal(act_loss, exp_loss)
[docs]class TestLambdaWeights(unittest.TestCase):
[docs] def test_tfr_default(self): labels = tf.constant([[2.0, 1.0, 0.0]]) ranks = tf.constant([[1, 2, 3]]) lambda_weight = DCGLambdaWeight() exp_weights = [[[0.0000000, 0.5, 0.3333333], [0.5000000, 0.0, 0.5000000], [0.3333333, 0.5, 0.0000000]]] act_weights = lambda_weight.pair_weights(labels, ranks) np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_smooth_fraction(self): labels = tf.constant([[2.0, 1.0, 0.0]]) ranks = tf.constant([[1, 2, 3]]) lambda_weight = DCGLambdaWeight(smooth_fraction=1.0) exp_weights = [[[0., 1. / 2., 2. * 2. / 3.], [1. / 2., 0., 1. / 6.], [2. * 2. / 3., 1. / 6., 0.]]] act_weights = lambda_weight.pair_weights(labels, ranks) np.testing.assert_array_almost_equal(act_weights, exp_weights) lambda_weight = DCGLambdaWeight(k=1, smooth_fraction=1.0) exp_weights = [[[0., 1., 2.], [1., 0., 0.], [2., 0., 0.]]] act_weights = lambda_weight.pair_weights(labels, ranks) np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_topn(self): labels = tf.constant([[2.0, 1.0, 0.0]]) ranks = tf.constant([[1, 2, 3]]) lambda_weight = DCGLambdaWeight(k=1) exp_weights = [[[0., 1. / 2., 2. * 1. / 2.], [1. / 2., 0., 0.], [2. * 1. / 2., 0., 0.]]] act_weights = lambda_weight.pair_weights(labels, ranks) np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_invalid_labels(self): labels = tf.constant([[2.0, 1.0, -1.0]]) ranks = tf.constant([[1, 2, 3]]) lambda_weight = DCGLambdaWeight() exp_weights = [[[0., 1. / 2., 0.], [1. / 2., 0., 0.], [0., 0., 0.]]] act_weights = lambda_weight.pair_weights(labels, ranks) np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_gain_and_discount(self): labels = tf.constant([[2.0, 1.0]]) ranks = tf.constant([[1, 2]]) lambda_weight = DCGLambdaWeight( gain_fn=lambda x: tf.pow(2.0, x) - 1.0, discount_fn=lambda r: 1.0 / tf.math.log1p(r) ) exp_weights = [[[0., 2. * (1. / math.log(2.) - 1. / math.log(3.))], [2. * (1. / math.log(2.) - 1. / math.log(3.)), 0.]]] act_weights = lambda_weight.pair_weights(labels, ranks) np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_normalized(self): labels = tf.constant([[1.0, 2.0]]) ranks = tf.constant([[1, 2]]) lambda_weight = DCGLambdaWeight(normalized=True) max_dcg = 2.5 exp_weights = [[[0., 1. / 2. / max_dcg], [1. / 2. / max_dcg, 0.]]] act_weights = lambda_weight.pair_weights(labels, ranks) np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_individual_weights(self): labels = tf.constant([[1.0, 2.0]]) ranks = tf.constant([[1, 2]]) lambda_weight = DCGLambdaWeight(normalized=True) max_dcg = 2.5 exp_weights = [[1. / max_dcg / 1., 2. / max_dcg / 2.]] act_weights = lambda_weight.individual_weights(labels, ranks) np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_create_ndcg_lambda_weight(self): labels = tf.constant([[2.0, 1.0]]) ranks = tf.constant([[1, 2]]) lambda_weight = DCGLambdaWeight( gain_fn=lambda label: tf.pow(2.0, label) - 1.0, discount_fn=lambda rank: 1. / tf.math.log1p(rank), normalized=True, smooth_fraction=0.0 ) max_dcg = 3.0 / math.log(2.) + 1.0 / math.log(3.) exp_weights = [[[0., 2. * (1. / math.log(2.) - 1. / math.log(3.)) / max_dcg], [2. * (1. / math.log(2.) - 1. / math.log(3.)) / max_dcg, 0.]]] act_weights = lambda_weight.pair_weights(labels, ranks) np.testing.assert_array_almost_equal(act_weights, exp_weights)