"""
Documentation
"""
# Python Modules
import logging
import math
import unittest
# 3rd Party Modules
import numpy as np
import tensorflow as tf
import deletor.tfutils as tfutils
tfutils.disable_gpu()
# Project Modules
from deletor.constants import MIN_FLOAT_32
from deletor.losses import RankingCrossEntropy, masked_softmax, MeanSquaredError, MultiLoss, \
RankingSoftmax, ApproximateNormalizedDiscountedCumulativeGain, DCGLambdaWeight, \
ApproximateBiDiNormalizedDiscountedCumulativeGain
np.set_printoptions(precision=8, suppress=True, edgeitems=20, linewidth=10000)
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)-15s [%(name)s]:%(lineno)d %(levelname)s %(message)s'
)
[docs]class TestLosses(unittest.TestCase):
y_true = tf.ragged.constant([[4., 0., 3., 1.],
[1., 2., 3., 0., 4., 4.],
[0., 4.],
[-1., -2., 3.],
[0., 0., 0.]])
y_pred = tf.ragged.constant([[3.5, 2.2, 0.5, 1.0],
[1.7, 2.3, 2.1, 1.1, 0.1, 1.0],
[3.8, 0.8],
[1.5, -1.0, 1.0],
[0.1, 1.0, 0.3]])
exp_softmax = [[0.69638747, 0.01275478, 0.25618663, 0.03467109, 0., 0.],
[0.01936247, 0.05263266, 0.14307041, 0.00712306, 0.3889057, 0.3889057],
[0.01798621, 0.98201376, 0., 0., 0., 0.],
[0.01786798, 0.00657326, 0.9755587, 0., 0., 0.],
[0.33333334, 0.33333334, 0.33333334, 0., 0., 0.]]
[docs] def test_masked_softmax(self):
y_true = self.y_true.to_tensor(MIN_FLOAT_32)
mask = tf.cast(tf.not_equal(y_true, MIN_FLOAT_32), tf.float32)
exp_softmax = self.exp_softmax
act_softmax = masked_softmax(y_true, mask)
np.testing.assert_array_almost_equal(act_softmax, exp_softmax)
[docs] def test_ranking_cross_entropy(self):
y_true = self.y_true.to_tensor(MIN_FLOAT_32)
y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)
mask = tf.cast(tf.not_equal(y_true, MIN_FLOAT_32), tf.float32)
exp_softmax = masked_softmax(y_pred, mask).numpy()
log_p = np.nan_to_num(np.log(exp_softmax), nan=0.0, posinf=0.0, neginf=0.0)
# Not normalized or reduced
loss_fn = RankingCrossEntropy(normalize=False, reduce=False)
exp_loss = -np.sum(y_true.numpy() * log_p, axis=1)
act_loss = loss_fn(y_true, y_pred)
np.testing.assert_array_almost_equal(act_loss, exp_loss, decimal=5)
# Normalized but not reduced
loss_fn = RankingCrossEntropy(normalize=True, reduce=False)
exp_loss = np.nan_to_num(
-np.sum(y_true.numpy() * log_p, axis=1) / np.sum(mask * np.abs(y_true), axis=1)
)
act_loss = loss_fn(y_true, y_pred)
np.testing.assert_array_almost_equal(act_loss, exp_loss, decimal=5)
# Reduced
loss_fn = RankingCrossEntropy(normalize=False, reduce=True)
exp_loss = np.mean(-np.sum(y_true.numpy() * log_p, axis=1))
act_loss = loss_fn(y_true, y_pred)
np.testing.assert_array_almost_equal(act_loss, exp_loss, decimal=5)
[docs] def test_approximate_ndcg(self):
y_true = self.y_true.to_tensor(MIN_FLOAT_32)
y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)
loss_fn = ApproximateNormalizedDiscountedCumulativeGain(reduce=True)
exp_loss = -2.205779552459717
# exp_loss = -0.4411559104919434
act_loss = loss_fn(y_true, y_pred)
self.assertAlmostEqual(act_loss, exp_loss)
[docs] def test_tfr_approximate_ndcg(self):
# expected tfr_loss: -1.5697249174118042
scores = [[1.4, -2.8, -0.4], [0., 1.8, 10.2], [1., 1.2, -3.2]]
# ranks= [[1, 3, 2], [3, 2, 1], [2, 1, 3]]
labels = [[0., 2., 1.], [1., 0., 3.], [0., 0., 0.]]
# weights = [[2.], [1.], [1.]]
# example_weights = [[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]
loss_fn = ApproximateNormalizedDiscountedCumulativeGain(reduce=True)
# TFR uses reduce_sum, but I think it should be reduce mean.
# Note: reduce_mean seems to work better, but could just be a matter
# of tuning the learning rate based on the magnitude of the loss.
exp_loss = -1.5697249174118042
act_loss = loss_fn(tf.constant(labels), tf.constant(scores))
np.testing.assert_array_almost_equal(act_loss, exp_loss)
[docs] def test_approximate_bidi_ndcg(self):
y_true = self.y_true.to_tensor(MIN_FLOAT_32)
y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)
loss_fn = ApproximateBiDiNormalizedDiscountedCumulativeGain(reduce=True)
act_loss = loss_fn(y_true, y_pred)
exp_loss = 0.0842013955116272
self.assertAlmostEqual(act_loss, exp_loss)
[docs] def test_mean_squared_error(self):
y_true = self.y_true.to_tensor(MIN_FLOAT_32)
y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)
# No reduction
loss_fn = MeanSquaredError(reduce=False, weight_by_labels=False)
exp_sqr = np.square(y_true - y_pred)
exp_sum = np.sum(exp_sqr, axis=1)
exp_n = np.sum(y_true > MIN_FLOAT_32, axis=1)
exp_mse = exp_sum / exp_n
act_mse = loss_fn(y_true, y_pred)
np.testing.assert_array_almost_equal(act_mse, exp_mse)
# With reduction
loss_fn = MeanSquaredError(reduce=True, weight_by_labels=False)
exp_mse = np.mean(exp_mse)
act_mse = loss_fn(y_true, y_pred)
np.testing.assert_array_almost_equal(act_mse, exp_mse)
[docs] def test_ranking_softmax_loss(self):
y_true = self.y_true.to_tensor(MIN_FLOAT_32)
y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)
loss_fn = RankingSoftmax(reduce=False)
exp_loss = [14.216904, 30.82149, 12.194349, 0., 0.]
act_loss = loss_fn(y_true, y_pred)
np.testing.assert_array_almost_equal(act_loss, exp_loss)
loss_fn = RankingSoftmax(reduce=True)
exp_loss = np.mean([14.216904, 30.82149, 12.194349, 0., 0.])
act_loss = loss_fn(y_true, y_pred)
np.testing.assert_array_almost_equal(act_loss, exp_loss)
[docs] def test_multi_loss(self):
y_true = self.y_true.to_tensor(MIN_FLOAT_32)
y_pred = self.y_pred.to_tensor(MIN_FLOAT_32)
rce = RankingCrossEntropy(normalize=False, reduce=False)
mse = MeanSquaredError(reduce=False, weight_by_labels=False)
loss_fn = MultiLoss([rce, mse], reduce=False)
# Not reduced
exp_loss = [17.051903, 35.289818, 24.53435, 0.25, 0.36666667]
act_loss = loss_fn(y_true, y_pred)
np.testing.assert_array_almost_equal(act_loss, exp_loss)
# Reduced
loss_fn = MultiLoss([rce, mse], reduce=True)
exp_loss = 15.498548
act_loss = loss_fn(y_true, y_pred)
self.assertAlmostEqual(act_loss, exp_loss)
# With weights
loss_fn = MultiLoss([rce, mse], [0.5, 2.0], reduce=False)
exp_loss = [12.778452, 24.34741, 30.777176, 5.75, 0.73333335]
act_loss = loss_fn(y_true, y_pred)
np.testing.assert_array_almost_equal(act_loss, exp_loss)
[docs]class TestLambdaWeights(unittest.TestCase):
[docs] def test_tfr_default(self):
labels = tf.constant([[2.0, 1.0, 0.0]])
ranks = tf.constant([[1, 2, 3]])
lambda_weight = DCGLambdaWeight()
exp_weights = [[[0.0000000, 0.5, 0.3333333],
[0.5000000, 0.0, 0.5000000],
[0.3333333, 0.5, 0.0000000]]]
act_weights = lambda_weight.pair_weights(labels, ranks)
np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_smooth_fraction(self):
labels = tf.constant([[2.0, 1.0, 0.0]])
ranks = tf.constant([[1, 2, 3]])
lambda_weight = DCGLambdaWeight(smooth_fraction=1.0)
exp_weights = [[[0., 1. / 2., 2. * 2. / 3.],
[1. / 2., 0., 1. / 6.],
[2. * 2. / 3., 1. / 6., 0.]]]
act_weights = lambda_weight.pair_weights(labels, ranks)
np.testing.assert_array_almost_equal(act_weights, exp_weights)
lambda_weight = DCGLambdaWeight(k=1, smooth_fraction=1.0)
exp_weights = [[[0., 1., 2.],
[1., 0., 0.],
[2., 0., 0.]]]
act_weights = lambda_weight.pair_weights(labels, ranks)
np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_topn(self):
labels = tf.constant([[2.0, 1.0, 0.0]])
ranks = tf.constant([[1, 2, 3]])
lambda_weight = DCGLambdaWeight(k=1)
exp_weights = [[[0., 1. / 2., 2. * 1. / 2.],
[1. / 2., 0., 0.],
[2. * 1. / 2., 0., 0.]]]
act_weights = lambda_weight.pair_weights(labels, ranks)
np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_invalid_labels(self):
labels = tf.constant([[2.0, 1.0, -1.0]])
ranks = tf.constant([[1, 2, 3]])
lambda_weight = DCGLambdaWeight()
exp_weights = [[[0., 1. / 2., 0.], [1. / 2., 0., 0.], [0., 0., 0.]]]
act_weights = lambda_weight.pair_weights(labels, ranks)
np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_gain_and_discount(self):
labels = tf.constant([[2.0, 1.0]])
ranks = tf.constant([[1, 2]])
lambda_weight = DCGLambdaWeight(
gain_fn=lambda x: tf.pow(2.0, x) - 1.0,
discount_fn=lambda r: 1.0 / tf.math.log1p(r)
)
exp_weights = [[[0., 2. * (1. / math.log(2.) - 1. / math.log(3.))],
[2. * (1. / math.log(2.) - 1. / math.log(3.)), 0.]]]
act_weights = lambda_weight.pair_weights(labels, ranks)
np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_normalized(self):
labels = tf.constant([[1.0, 2.0]])
ranks = tf.constant([[1, 2]])
lambda_weight = DCGLambdaWeight(normalized=True)
max_dcg = 2.5
exp_weights = [[[0., 1. / 2. / max_dcg], [1. / 2. / max_dcg, 0.]]]
act_weights = lambda_weight.pair_weights(labels, ranks)
np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_individual_weights(self):
labels = tf.constant([[1.0, 2.0]])
ranks = tf.constant([[1, 2]])
lambda_weight = DCGLambdaWeight(normalized=True)
max_dcg = 2.5
exp_weights = [[1. / max_dcg / 1., 2. / max_dcg / 2.]]
act_weights = lambda_weight.individual_weights(labels, ranks)
np.testing.assert_array_almost_equal(act_weights, exp_weights)
[docs] def test_tfr_create_ndcg_lambda_weight(self):
labels = tf.constant([[2.0, 1.0]])
ranks = tf.constant([[1, 2]])
lambda_weight = DCGLambdaWeight(
gain_fn=lambda label: tf.pow(2.0, label) - 1.0,
discount_fn=lambda rank: 1. / tf.math.log1p(rank),
normalized=True,
smooth_fraction=0.0
)
max_dcg = 3.0 / math.log(2.) + 1.0 / math.log(3.)
exp_weights = [[[0., 2. * (1. / math.log(2.) - 1. / math.log(3.)) / max_dcg],
[2. * (1. / math.log(2.) - 1. / math.log(3.)) / max_dcg, 0.]]]
act_weights = lambda_weight.pair_weights(labels, ranks)
np.testing.assert_array_almost_equal(act_weights, exp_weights)