Source code for test.random.test_sample

"""
Documentation
"""
#
#
#  Copyright 2020 Reid Swanson
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#  http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#

# Python Modules
import unittest

# 3rd Party Modules
import numpy as np
import tensorflow as tf

# Project Modules
import deletor.tfutils as tfutils

from deletor.constants import MIN_FLOAT_32 as PAD
from deletor.random.sample import IndependentSingleOutputSampler, IndependentMultiOutputSampler

tfutils.disable_gpu()
tf.config.experimental_run_functions_eagerly(True)

np.set_printoptions(precision=6, suppress=True, edgeitems=200, linewidth=1000000)


[docs]class TestSampleBeforeBatching(unittest.TestCase): x = [[[0.0, 1.], [2.0, 3.], [4.0, 5.], [6.0, 7.]], [[10., 11.], [12., 13.], [14., 15.], [16., 17.], [18., 19.]], [[20., 21.], [22., 23.]]] y = [[0.0, 1., 2., 3.], [5.0, 6., 7., 8., 9.], [10., 11.]]
[docs] @unittest.skip("The test seems to be broken, but sample before batching is not used right not") def test_independent_multi_output(self): sample_size = 3 x0 = {'sequence_dense': tf.constant(self.x[0], tf.float32)} y0 = tf.constant(self.y[0], tf.float32) r0 = tf.constant([[3, 2, 1, 0], [2, 1, 0, 3], [3, 2, 0, 1]]) sampler = IndependentMultiOutputSampler(sample_size, multiple=1, sample_pre_batch=True) x, y = sampler(x0, y0, random_values=r0) exp_x_sample = [[[0., 1.], [2., 3.], [4., 5.]], [[6., 7.], [0., 1.], [2., 3.]], [[0., 1.], [2., 3.], [6., 7.]]] exp_y_sample = [[0., 1., 2.], [3., 0., 1.], [0., 1., 3.]] exp_document_counts = [3., 3., 1., 2.] act_x_sample = x['sequence_dense'] act_y_sample = y[1] # act_document_counts = x['document_counts'] print(f"act_x_sample:\n{act_y_sample}") np.testing.assert_array_equal(act_x_sample, exp_x_sample) np.testing.assert_array_equal(act_y_sample, exp_y_sample) # np.testing.assert_array_equal(act_document_counts, exp_document_counts) x2 = {'sequence_dense': tf.constant(self.x[2], tf.float32)} y2 = tf.constant(self.y[2], tf.float32) r2 = tf.constant([[0, 1], [1, 0]]) sampler = IndependentMultiOutputSampler(sample_size, multiple=1, sample_pre_batch=True) x, y = sampler(x2, y2, random_values=r2) exp_x_sample = [[[22., 23.], [20., 21.]], [[20., 21.], [22., 23.]]] exp_y_sample = [[11., 10.], [10., 11.]] exp_document_counts = [2., 2.] act_x_sample = x['sequence_dense'] act_y_sample = y[1] # act_document_counts = x['document_counts'] np.testing.assert_array_equal(act_x_sample, exp_x_sample) np.testing.assert_array_equal(act_y_sample, exp_y_sample)
# np.testing.assert_array_equal(act_document_counts, exp_document_counts)
[docs]class TestSampleAfterBatching(unittest.TestCase): x = [[[0.0, 1.], [2.0, 3.], [4.0, 5.], [6.0, 7.], [0.0, 0.]], [[10., 11.], [12., 13.], [14., 15.], [16., 17.], [18., 19.]], [[20., 21.], [22., 23.], [0.0, 0.], [0.0, 0.], [0.0, 0.]]] y = [[0.0, 1., 2., 3., PAD], [5.0, 6., 7., 8., 9.], [10., 11., PAD, PAD, PAD]] r = [[[99., 15., 19., 66., 0.], [37., 54., 33., 44., 0.], [48., 57., 24., 46., 0.], [43., 82., 41., 68., 0.], [28., 14., 53., 58., 0.]], [[97., 31., 93., 58., 32.], [45., 10., 11., 98., 90.], [70., 48., 73., 36., 74.], [10., 40., 90., 41., 14.], [30., 20., 33., 12., 38.]], [[33., 42., 76., 0., 0.], [92., 98., 78., 0., 0.], [57., 55., 51., 0., 0.], [53., 70., 83., 0., 0.], [56., 62., 35., 0., 0.]]] a = [[[0, 3, 2, 1, 4], [1, 3, 0, 2, 4], [1, 0, 3, 2, 4], [1, 3, 0, 2, 4], [3, 2, 0, 1, 4]], [[0, 2, 3, 4, 1], [3, 4, 0, 2, 1], [4, 2, 0, 1, 3], [2, 3, 1, 4, 0], [4, 2, 0, 1, 3]], [[2, 1, 0, 3, 4], [1, 0, 2, 3, 4], [0, 1, 2, 3, 4], [2, 1, 0, 3, 4], [1, 0, 2, 3, 4]]] exp_x_sample = [[[[0.0, 1.], [6.0, 7.], [4.0, 5.]], [[2.0, 3.], [6.0, 7.], [0.0, 1.]], [[2.0, 3.], [0.0, 1.], [6.0, 7.]], [[2.0, 3.], [6.0, 7.], [0.0, 1.]], [[6.0, 7.], [4.0, 5.], [0.0, 1.]]], [[[10., 11.], [14., 15.], [16., 17.]], [[16., 17.], [18., 19.], [10., 11.]], [[18., 19.], [14., 15.], [10., 11.]], [[14., 15.], [16., 17.], [12., 13.]], [[18., 19.], [14., 15.], [10., 11.]]], [[[22., 23.], [20., 21.], [0.0, 0.]], [[22., 23.], [20., 21.], [0.0, 0.]], [[20., 21.], [22., 23.], [0.0, 0.]], [[22., 23.], [20., 21.], [0.0, 0.]], [[22., 23.], [20., 21.], [0.0, 0.]]]] exp_y_sample = [[[0, 3, 2], [1, 3, 0], [1, 0, 3], [1, 3, 0], [3, 2, 0]], [[5, 7, 8], [8, 9, 5], [9, 7, 5], [7, 8, 6], [9, 7, 5]], [[11, 10, PAD], [11, 10, PAD], [10, 11, PAD], [11, 10, PAD], [11, 10, PAD]]] # noinspection DuplicatedCode
[docs] def test_independent_multi_output(self): batch_size = 3 n_documents = n_samples = 5 sample_size = 3 x = {'sequence_dense': tf.constant(self.x)} y = tf.constant(self.y) r = tf.constant(self.r) sampler = IndependentMultiOutputSampler(sample_size, multiple=1, sample_pre_batch=False) x, y = sampler(x, y, random_values=r) exp_scatter_idx = [[[[0, 0], [0, 3], [0, 2]], [[0, 1], [0, 3], [0, 0]], [[0, 1], [0, 0], [0, 3]], [[0, 1], [0, 3], [0, 0]], [[0, 3], [0, 2], [0, 0]]], [[[1, 0], [1, 2], [1, 3]], [[1, 3], [1, 4], [1, 0]], [[1, 4], [1, 2], [1, 0]], [[1, 2], [1, 3], [1, 1]], [[1, 4], [1, 2], [1, 0]]], [[[2, 1], [2, 0], [2, 2]], [[2, 1], [2, 0], [2, 2]], [[2, 0], [2, 1], [2, 2]], [[2, 1], [2, 0], [2, 2]], [[2, 1], [2, 0], [2, 2]]]] exp_counts = [[5., 3., 2., 5., 0.], [4., 1., 4., 3., 3.], [5., 5., 5., 0., 0.]] act_x_sample = x['sample_dense'] act_y_sample = y[1] act_scatter_idx = x['scatter_idx'] act_counts = x['document_counts'] np.testing.assert_array_equal(act_x_sample, self.exp_x_sample) np.testing.assert_array_equal(act_y_sample, self.exp_y_sample) np.testing.assert_array_equal(act_scatter_idx, exp_scatter_idx) np.testing.assert_array_equal(act_counts, exp_counts) # Simulate some expected fake neural net output data. updates = tf.reshape( tf.range(batch_size * n_samples * sample_size), (batch_size, n_samples, sample_size) ) exp_pred = [[37, 18, 15, 35, 0], [87, 26, 90, 60, 67], [184, 181, 190, 0, 0]] act_pred = tf.scatter_nd(x['scatter_idx'], updates, (batch_size, n_documents)) np.testing.assert_array_equal(act_pred, exp_pred)
# noinspection DuplicatedCode
[docs] def test_independent_single_output(self): batch_size = 3 n_documents = n_samples = 5 sample_size = 3 x = {'sequence_dense': tf.constant(self.x)} y = tf.constant(self.y) r = tf.constant(self.r) sampler = IndependentSingleOutputSampler(sample_size, multiple=1, sample_pre_batch=False) x, y = sampler(x, y, random_values=r) exp_scatter_idx = [[[[0, 0]], [[0, 1]], [[0, 1]], [[0, 1]], [[0, 3]]], [[[1, 0]], [[1, 3]], [[1, 4]], [[1, 2]], [[1, 4]]], [[[2, 1]], [[2, 1]], [[2, 0]], [[2, 1]], [[2, 1]]]] exp_counts = [[1, 3, 0, 1, 0], [1, 0, 1, 1, 2], [1, 4, 0, 0, 0]] act_x_sample = x['sample_dense'] act_y_sample = y[1] act_scatter_idx = x['scatter_idx'] act_counts = x['document_counts'] np.testing.assert_array_equal(act_x_sample, self.exp_x_sample) np.testing.assert_array_equal(act_y_sample, self.exp_y_sample) np.testing.assert_array_equal(act_scatter_idx, exp_scatter_idx) np.testing.assert_array_equal(act_counts, exp_counts) # Simulate some expected fake neural net output data. updates = tf.reshape(tf.range(batch_size * n_samples), (batch_size, n_samples, 1)) exp_pred = [[0, 6, 0, 4, 0], [5, 0, 8, 6, 16], [12, 48, 0, 0, 0]] act_pred = tf.scatter_nd(x['scatter_idx'], updates, (batch_size, n_documents)) np.testing.assert_array_equal(act_pred, exp_pred)