"""
Documentation
"""
#
#
# Copyright 2020 Reid Swanson
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Python Modules
import unittest
# 3rd Party Modules
import numpy as np
import tensorflow as tf
# Project Modules
import deletor.tfutils as tfutils
from deletor.constants import MIN_FLOAT_32 as PAD
from deletor.random.sample import IndependentSingleOutputSampler, IndependentMultiOutputSampler
tfutils.disable_gpu()
tf.config.experimental_run_functions_eagerly(True)
np.set_printoptions(precision=6, suppress=True, edgeitems=200, linewidth=1000000)
[docs]class TestSampleBeforeBatching(unittest.TestCase):
x = [[[0.0, 1.], [2.0, 3.], [4.0, 5.], [6.0, 7.]],
[[10., 11.], [12., 13.], [14., 15.], [16., 17.], [18., 19.]],
[[20., 21.], [22., 23.]]]
y = [[0.0, 1., 2., 3.],
[5.0, 6., 7., 8., 9.],
[10., 11.]]
[docs] @unittest.skip("The test seems to be broken, but sample before batching is not used right not")
def test_independent_multi_output(self):
sample_size = 3
x0 = {'sequence_dense': tf.constant(self.x[0], tf.float32)}
y0 = tf.constant(self.y[0], tf.float32)
r0 = tf.constant([[3, 2, 1, 0],
[2, 1, 0, 3],
[3, 2, 0, 1]])
sampler = IndependentMultiOutputSampler(sample_size, multiple=1, sample_pre_batch=True)
x, y = sampler(x0, y0, random_values=r0)
exp_x_sample = [[[0., 1.], [2., 3.], [4., 5.]],
[[6., 7.], [0., 1.], [2., 3.]],
[[0., 1.], [2., 3.], [6., 7.]]]
exp_y_sample = [[0., 1., 2.],
[3., 0., 1.],
[0., 1., 3.]]
exp_document_counts = [3., 3., 1., 2.]
act_x_sample = x['sequence_dense']
act_y_sample = y[1]
# act_document_counts = x['document_counts']
print(f"act_x_sample:\n{act_y_sample}")
np.testing.assert_array_equal(act_x_sample, exp_x_sample)
np.testing.assert_array_equal(act_y_sample, exp_y_sample)
# np.testing.assert_array_equal(act_document_counts, exp_document_counts)
x2 = {'sequence_dense': tf.constant(self.x[2], tf.float32)}
y2 = tf.constant(self.y[2], tf.float32)
r2 = tf.constant([[0, 1],
[1, 0]])
sampler = IndependentMultiOutputSampler(sample_size, multiple=1, sample_pre_batch=True)
x, y = sampler(x2, y2, random_values=r2)
exp_x_sample = [[[22., 23.], [20., 21.]],
[[20., 21.], [22., 23.]]]
exp_y_sample = [[11., 10.],
[10., 11.]]
exp_document_counts = [2., 2.]
act_x_sample = x['sequence_dense']
act_y_sample = y[1]
# act_document_counts = x['document_counts']
np.testing.assert_array_equal(act_x_sample, exp_x_sample)
np.testing.assert_array_equal(act_y_sample, exp_y_sample)
# np.testing.assert_array_equal(act_document_counts, exp_document_counts)
[docs]class TestSampleAfterBatching(unittest.TestCase):
x = [[[0.0, 1.], [2.0, 3.], [4.0, 5.], [6.0, 7.], [0.0, 0.]],
[[10., 11.], [12., 13.], [14., 15.], [16., 17.], [18., 19.]],
[[20., 21.], [22., 23.], [0.0, 0.], [0.0, 0.], [0.0, 0.]]]
y = [[0.0, 1., 2., 3., PAD],
[5.0, 6., 7., 8., 9.],
[10., 11., PAD, PAD, PAD]]
r = [[[99., 15., 19., 66., 0.],
[37., 54., 33., 44., 0.],
[48., 57., 24., 46., 0.],
[43., 82., 41., 68., 0.],
[28., 14., 53., 58., 0.]],
[[97., 31., 93., 58., 32.],
[45., 10., 11., 98., 90.],
[70., 48., 73., 36., 74.],
[10., 40., 90., 41., 14.],
[30., 20., 33., 12., 38.]],
[[33., 42., 76., 0., 0.],
[92., 98., 78., 0., 0.],
[57., 55., 51., 0., 0.],
[53., 70., 83., 0., 0.],
[56., 62., 35., 0., 0.]]]
a = [[[0, 3, 2, 1, 4],
[1, 3, 0, 2, 4],
[1, 0, 3, 2, 4],
[1, 3, 0, 2, 4],
[3, 2, 0, 1, 4]],
[[0, 2, 3, 4, 1],
[3, 4, 0, 2, 1],
[4, 2, 0, 1, 3],
[2, 3, 1, 4, 0],
[4, 2, 0, 1, 3]],
[[2, 1, 0, 3, 4],
[1, 0, 2, 3, 4],
[0, 1, 2, 3, 4],
[2, 1, 0, 3, 4],
[1, 0, 2, 3, 4]]]
exp_x_sample = [[[[0.0, 1.], [6.0, 7.], [4.0, 5.]],
[[2.0, 3.], [6.0, 7.], [0.0, 1.]],
[[2.0, 3.], [0.0, 1.], [6.0, 7.]],
[[2.0, 3.], [6.0, 7.], [0.0, 1.]],
[[6.0, 7.], [4.0, 5.], [0.0, 1.]]],
[[[10., 11.], [14., 15.], [16., 17.]],
[[16., 17.], [18., 19.], [10., 11.]],
[[18., 19.], [14., 15.], [10., 11.]],
[[14., 15.], [16., 17.], [12., 13.]],
[[18., 19.], [14., 15.], [10., 11.]]],
[[[22., 23.], [20., 21.], [0.0, 0.]],
[[22., 23.], [20., 21.], [0.0, 0.]],
[[20., 21.], [22., 23.], [0.0, 0.]],
[[22., 23.], [20., 21.], [0.0, 0.]],
[[22., 23.], [20., 21.], [0.0, 0.]]]]
exp_y_sample = [[[0, 3, 2], [1, 3, 0], [1, 0, 3], [1, 3, 0], [3, 2, 0]],
[[5, 7, 8], [8, 9, 5], [9, 7, 5], [7, 8, 6], [9, 7, 5]],
[[11, 10, PAD], [11, 10, PAD], [10, 11, PAD], [11, 10, PAD], [11, 10, PAD]]]
# noinspection DuplicatedCode
[docs] def test_independent_multi_output(self):
batch_size = 3
n_documents = n_samples = 5
sample_size = 3
x = {'sequence_dense': tf.constant(self.x)}
y = tf.constant(self.y)
r = tf.constant(self.r)
sampler = IndependentMultiOutputSampler(sample_size, multiple=1, sample_pre_batch=False)
x, y = sampler(x, y, random_values=r)
exp_scatter_idx = [[[[0, 0], [0, 3], [0, 2]],
[[0, 1], [0, 3], [0, 0]],
[[0, 1], [0, 0], [0, 3]],
[[0, 1], [0, 3], [0, 0]],
[[0, 3], [0, 2], [0, 0]]],
[[[1, 0], [1, 2], [1, 3]],
[[1, 3], [1, 4], [1, 0]],
[[1, 4], [1, 2], [1, 0]],
[[1, 2], [1, 3], [1, 1]],
[[1, 4], [1, 2], [1, 0]]],
[[[2, 1], [2, 0], [2, 2]],
[[2, 1], [2, 0], [2, 2]],
[[2, 0], [2, 1], [2, 2]],
[[2, 1], [2, 0], [2, 2]],
[[2, 1], [2, 0], [2, 2]]]]
exp_counts = [[5., 3., 2., 5., 0.],
[4., 1., 4., 3., 3.],
[5., 5., 5., 0., 0.]]
act_x_sample = x['sample_dense']
act_y_sample = y[1]
act_scatter_idx = x['scatter_idx']
act_counts = x['document_counts']
np.testing.assert_array_equal(act_x_sample, self.exp_x_sample)
np.testing.assert_array_equal(act_y_sample, self.exp_y_sample)
np.testing.assert_array_equal(act_scatter_idx, exp_scatter_idx)
np.testing.assert_array_equal(act_counts, exp_counts)
# Simulate some expected fake neural net output data.
updates = tf.reshape(
tf.range(batch_size * n_samples * sample_size),
(batch_size, n_samples, sample_size)
)
exp_pred = [[37, 18, 15, 35, 0],
[87, 26, 90, 60, 67],
[184, 181, 190, 0, 0]]
act_pred = tf.scatter_nd(x['scatter_idx'], updates, (batch_size, n_documents))
np.testing.assert_array_equal(act_pred, exp_pred)
# noinspection DuplicatedCode
[docs] def test_independent_single_output(self):
batch_size = 3
n_documents = n_samples = 5
sample_size = 3
x = {'sequence_dense': tf.constant(self.x)}
y = tf.constant(self.y)
r = tf.constant(self.r)
sampler = IndependentSingleOutputSampler(sample_size, multiple=1, sample_pre_batch=False)
x, y = sampler(x, y, random_values=r)
exp_scatter_idx = [[[[0, 0]], [[0, 1]], [[0, 1]], [[0, 1]], [[0, 3]]],
[[[1, 0]], [[1, 3]], [[1, 4]], [[1, 2]], [[1, 4]]],
[[[2, 1]], [[2, 1]], [[2, 0]], [[2, 1]], [[2, 1]]]]
exp_counts = [[1, 3, 0, 1, 0],
[1, 0, 1, 1, 2],
[1, 4, 0, 0, 0]]
act_x_sample = x['sample_dense']
act_y_sample = y[1]
act_scatter_idx = x['scatter_idx']
act_counts = x['document_counts']
np.testing.assert_array_equal(act_x_sample, self.exp_x_sample)
np.testing.assert_array_equal(act_y_sample, self.exp_y_sample)
np.testing.assert_array_equal(act_scatter_idx, exp_scatter_idx)
np.testing.assert_array_equal(act_counts, exp_counts)
# Simulate some expected fake neural net output data.
updates = tf.reshape(tf.range(batch_size * n_samples), (batch_size, n_samples, 1))
exp_pred = [[0, 6, 0, 4, 0],
[5, 0, 8, 6, 16],
[12, 48, 0, 0, 0]]
act_pred = tf.scatter_nd(x['scatter_idx'], updates, (batch_size, n_documents))
np.testing.assert_array_equal(act_pred, exp_pred)