Source code for test.random.test_sample

"""
Documentation
"""
#
#
#  Copyright 2020 Reid Swanson
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

# Python Modules
import unittest

# 3rd Party Modules
import numpy as np
import tensorflow as tf

# Project Modules
import deletor.tfutils as tfutils

from deletor.constants import MIN_FLOAT_32 as PAD
from deletor.random.sample import IndependentSingleOutputSampler, IndependentMultiOutputSampler

tfutils.disable_gpu()
tf.config.experimental_run_functions_eagerly(True)

np.set_printoptions(precision=6, suppress=True, edgeitems=200, linewidth=1000000)


[docs]class TestSampleBeforeBatching(unittest.TestCase):
    x = [[[0.0,  1.], [2.0,  3.], [4.0,  5.], [6.0,  7.]],
         [[10., 11.], [12., 13.], [14., 15.], [16., 17.], [18., 19.]],
         [[20., 21.], [22., 23.]]]

    y = [[0.0,  1.,  2.,  3.],
         [5.0,  6.,  7.,  8.,  9.],
         [10., 11.]]

[docs]    @unittest.skip("The test seems to be broken, but sample before batching is not used right not")
    def test_independent_multi_output(self):
        sample_size = 3

        x0 = {'sequence_dense': tf.constant(self.x[0], tf.float32)}
        y0 = tf.constant(self.y[0], tf.float32)
        r0 = tf.constant([[3, 2, 1, 0],
                          [2, 1, 0, 3],
                          [3, 2, 0, 1]])

        sampler = IndependentMultiOutputSampler(sample_size, multiple=1, sample_pre_batch=True)
        x, y = sampler(x0, y0, random_values=r0)

        exp_x_sample = [[[0., 1.], [2., 3.], [4., 5.]],
                        [[6., 7.], [0., 1.], [2., 3.]],
                        [[0., 1.], [2., 3.], [6., 7.]]]
        exp_y_sample = [[0., 1., 2.],
                        [3., 0., 1.],
                        [0., 1., 3.]]
        exp_document_counts = [3., 3., 1., 2.]

        act_x_sample = x['sequence_dense']
        act_y_sample = y[1]
        # act_document_counts = x['document_counts']

        print(f"act_x_sample:\n{act_y_sample}")
        np.testing.assert_array_equal(act_x_sample, exp_x_sample)
        np.testing.assert_array_equal(act_y_sample, exp_y_sample)
        # np.testing.assert_array_equal(act_document_counts, exp_document_counts)

        x2 = {'sequence_dense': tf.constant(self.x[2], tf.float32)}
        y2 = tf.constant(self.y[2], tf.float32)
        r2 = tf.constant([[0, 1],
                          [1, 0]])

        sampler = IndependentMultiOutputSampler(sample_size, multiple=1, sample_pre_batch=True)
        x, y = sampler(x2, y2, random_values=r2)

        exp_x_sample = [[[22., 23.], [20., 21.]],
                        [[20., 21.], [22., 23.]]]
        exp_y_sample = [[11., 10.],
                        [10., 11.]]
        exp_document_counts = [2., 2.]

        act_x_sample = x['sequence_dense']
        act_y_sample = y[1]
        # act_document_counts = x['document_counts']

        np.testing.assert_array_equal(act_x_sample, exp_x_sample)
        np.testing.assert_array_equal(act_y_sample, exp_y_sample)
        # np.testing.assert_array_equal(act_document_counts, exp_document_counts)


[docs]class TestSampleAfterBatching(unittest.TestCase):
    x = [[[0.0,  1.], [2.0,  3.], [4.0,  5.], [6.0,  7.], [0.0,  0.]],
         [[10., 11.], [12., 13.], [14., 15.], [16., 17.], [18., 19.]],
         [[20., 21.], [22., 23.], [0.0,  0.], [0.0,  0.], [0.0,  0.]]]

    y = [[0.0,  1.,  2.,  3.,  PAD],
         [5.0,  6.,  7.,  8.,  9.],
         [10., 11., PAD, PAD, PAD]]

    r = [[[99., 15., 19., 66.,  0.],
          [37., 54., 33., 44.,  0.],
          [48., 57., 24., 46.,  0.],
          [43., 82., 41., 68.,  0.],
          [28., 14., 53., 58.,  0.]],
         [[97., 31., 93., 58., 32.],
          [45., 10., 11., 98., 90.],
          [70., 48., 73., 36., 74.],
          [10., 40., 90., 41., 14.],
          [30., 20., 33., 12., 38.]],
         [[33., 42., 76.,  0.,  0.],
          [92., 98., 78.,  0.,  0.],
          [57., 55., 51.,  0.,  0.],
          [53., 70., 83.,  0.,  0.],
          [56., 62., 35.,  0.,  0.]]]

    a = [[[0, 3, 2, 1, 4],
          [1, 3, 0, 2, 4],
          [1, 0, 3, 2, 4],
          [1, 3, 0, 2, 4],
          [3, 2, 0, 1, 4]],
         [[0, 2, 3, 4, 1],
          [3, 4, 0, 2, 1],
          [4, 2, 0, 1, 3],
          [2, 3, 1, 4, 0],
          [4, 2, 0, 1, 3]],
         [[2, 1, 0, 3, 4],
          [1, 0, 2, 3, 4],
          [0, 1, 2, 3, 4],
          [2, 1, 0, 3, 4],
          [1, 0, 2, 3, 4]]]

    exp_x_sample = [[[[0.0,  1.], [6.0,  7.], [4.0,  5.]],
                     [[2.0,  3.], [6.0,  7.], [0.0,  1.]],
                     [[2.0,  3.], [0.0,  1.], [6.0,  7.]],
                     [[2.0,  3.], [6.0,  7.], [0.0,  1.]],
                     [[6.0,  7.], [4.0,  5.], [0.0,  1.]]],
                    [[[10., 11.], [14., 15.], [16., 17.]],
                     [[16., 17.], [18., 19.], [10., 11.]],
                     [[18., 19.], [14., 15.], [10., 11.]],
                     [[14., 15.], [16., 17.], [12., 13.]],
                     [[18., 19.], [14., 15.], [10., 11.]]],
                    [[[22., 23.], [20., 21.], [0.0,  0.]],
                     [[22., 23.], [20., 21.], [0.0,  0.]],
                     [[20., 21.], [22., 23.], [0.0,  0.]],
                     [[22., 23.], [20., 21.], [0.0,  0.]],
                     [[22., 23.], [20., 21.], [0.0,  0.]]]]

    exp_y_sample = [[[0, 3, 2], [1, 3, 0], [1, 0, 3], [1, 3, 0], [3, 2, 0]],
                    [[5, 7, 8], [8, 9, 5], [9, 7, 5], [7, 8, 6], [9, 7, 5]],
                    [[11, 10, PAD], [11, 10, PAD], [10, 11, PAD], [11, 10, PAD], [11, 10, PAD]]]

    # noinspection DuplicatedCode
[docs]    def test_independent_multi_output(self):
        batch_size = 3
        n_documents = n_samples = 5
        sample_size = 3

        x = {'sequence_dense': tf.constant(self.x)}
        y = tf.constant(self.y)
        r = tf.constant(self.r)

        sampler = IndependentMultiOutputSampler(sample_size, multiple=1, sample_pre_batch=False)
        x, y = sampler(x, y, random_values=r)

        exp_scatter_idx = [[[[0, 0], [0, 3], [0, 2]],
                            [[0, 1], [0, 3], [0, 0]],
                            [[0, 1], [0, 0], [0, 3]],
                            [[0, 1], [0, 3], [0, 0]],
                            [[0, 3], [0, 2], [0, 0]]],
                           [[[1, 0], [1, 2], [1, 3]],
                            [[1, 3], [1, 4], [1, 0]],
                            [[1, 4], [1, 2], [1, 0]],
                            [[1, 2], [1, 3], [1, 1]],
                            [[1, 4], [1, 2], [1, 0]]],
                           [[[2, 1], [2, 0], [2, 2]],
                            [[2, 1], [2, 0], [2, 2]],
                            [[2, 0], [2, 1], [2, 2]],
                            [[2, 1], [2, 0], [2, 2]],
                            [[2, 1], [2, 0], [2, 2]]]]
        exp_counts = [[5., 3., 2., 5., 0.],
                      [4., 1., 4., 3., 3.],
                      [5., 5., 5., 0., 0.]]

        act_x_sample = x['sample_dense']
        act_y_sample = y[1]
        act_scatter_idx = x['scatter_idx']
        act_counts = x['document_counts']

        np.testing.assert_array_equal(act_x_sample, self.exp_x_sample)
        np.testing.assert_array_equal(act_y_sample, self.exp_y_sample)
        np.testing.assert_array_equal(act_scatter_idx, exp_scatter_idx)
        np.testing.assert_array_equal(act_counts, exp_counts)

        # Simulate some expected fake neural net output data.
        updates = tf.reshape(
            tf.range(batch_size * n_samples * sample_size),
            (batch_size, n_samples, sample_size)
        )
        exp_pred = [[37,   18,  15,  35,   0],
                    [87,   26,  90,  60,  67],
                    [184, 181, 190,   0,   0]]
        act_pred = tf.scatter_nd(x['scatter_idx'], updates, (batch_size, n_documents))
        np.testing.assert_array_equal(act_pred, exp_pred)

    # noinspection DuplicatedCode
[docs]    def test_independent_single_output(self):
        batch_size = 3
        n_documents = n_samples = 5
        sample_size = 3

        x = {'sequence_dense': tf.constant(self.x)}
        y = tf.constant(self.y)
        r = tf.constant(self.r)

        sampler = IndependentSingleOutputSampler(sample_size, multiple=1, sample_pre_batch=False)
        x, y = sampler(x, y, random_values=r)

        exp_scatter_idx = [[[[0, 0]], [[0, 1]], [[0, 1]], [[0, 1]], [[0, 3]]],
                           [[[1, 0]], [[1, 3]], [[1, 4]], [[1, 2]], [[1, 4]]],
                           [[[2, 1]], [[2, 1]], [[2, 0]], [[2, 1]], [[2, 1]]]]
        exp_counts = [[1, 3, 0, 1, 0],
                      [1, 0, 1, 1, 2],
                      [1, 4, 0, 0, 0]]
        act_x_sample = x['sample_dense']
        act_y_sample = y[1]
        act_scatter_idx = x['scatter_idx']
        act_counts = x['document_counts']

        np.testing.assert_array_equal(act_x_sample, self.exp_x_sample)
        np.testing.assert_array_equal(act_y_sample, self.exp_y_sample)
        np.testing.assert_array_equal(act_scatter_idx, exp_scatter_idx)
        np.testing.assert_array_equal(act_counts, exp_counts)

        # Simulate some expected fake neural net output data.
        updates = tf.reshape(tf.range(batch_size * n_samples), (batch_size, n_samples, 1))
        exp_pred = [[0,   6,  0,  4,  0],
                    [5,   0,  8,  6, 16],
                    [12, 48,  0,  0,  0]]
        act_pred = tf.scatter_nd(x['scatter_idx'], updates, (batch_size, n_documents))
        np.testing.assert_array_equal(act_pred, exp_pred)