"""
Documentation
"""
# Python Modules
import argparse
import os
import tempfile
import unittest
# 3rd Party Modules
import numpy as np
import tensorflow as tf
# Project Modules
import examples.build_tfrecords as build
import examples.pipeline as pipeline
import deletor.models.gsf as gsf
import deletor.tfutils as tfutils
from deletor.constants import MIN_FLOAT_32
tfutils.disable_gpu()
tf.config.experimental_run_functions_eagerly(True)
np.set_printoptions(precision=2, suppress=True, edgeitems=20, linewidth=10000)
[docs]class MyTestCase(unittest.TestCase):
seed = 0
multiple = 2
group_size = 4
padded_shapes = (
{
'context_one_hot': dict(),
'context_multi_hot': dict(),
'context_dense': dict(),
'sequence_one_hot': dict(),
'sequence_multi_hot': dict(),
'sequence_dense': (None, None)
},
(None,)
)
padding_values = (
{
'context_one_hot': 0.,
'context_multi_hot': 0.,
'context_dense': 0.,
'sequence_one_hot': 0.,
'sequence_multi_hot': 0.,
'sequence_dense': 0.
},
MIN_FLOAT_32
)
[docs] @classmethod
def setUpClass(cls) -> None:
scriptpath = os.path.realpath(__file__)
directory = os.path.dirname(scriptpath)
# The raw data is stored here
svmpath = os.path.join(directory, 'test_data.svm')
# Write a tfrecords file based on the raw data
with tempfile.NamedTemporaryFile() as tfrfile:
# Create a Namespace with the necessary arguments to write the
# tfrecords file to disk.
args = {
'input_file': svmpath,
'output_file': tfrfile.name,
'compression_type': None,
'compression_level': None
}
# Write the tfrecords file
build.write_data(argparse.Namespace(**args))
dataset = pipeline.load_dataset(tfrfile.name, n_features=6).cache()
# Load the dataset into the cache
for _ in dataset:
pass
cls.dataset = dataset
[docs] def test_gsf_foward_pass(self):
group_size = self.group_size
multiple = self.multiple
seed = self.seed
model_params = {
gsf.ModelParameter.RANDOM_SEED: self.seed,
gsf.ModelParameter.GROUP_SIZE: group_size,
gsf.ModelParameter.N_UNITS: [8, 4]
}
model = gsf.GroupwiseScoringNetwork(model_params)
sample = pipeline.sample_documents
# Padding must be done before sampling
dataset = self.dataset.padded_batch(2, self.padded_shapes, self.padding_values)
# dataset = dataset.map(lambda a, b: sample(a, b, group_size, n_samples, seed))
for x, y in dataset:
# print(f"y:\n{y}")
x, y = sample(x, y, group_size, multiple, method='flat_indices', seed=seed)
scores = model(x, training=False)
print(f"scores:\n{scores}")
[docs] def test_gin_forward_pass(self):
group_size = self.group_size
# multiple = self.multiple
multiple = 1
seed = self.seed
model_params = {
gsf.ModelParameter.RANDOM_SEED: self.seed,
gsf.ModelParameter.GROUP_SIZE: group_size,
gsf.ModelParameter.N_UNITS: [8, 4]
}
model = gsf.GroupwiseInputNetwork(model_params)
sample = pipeline.sample_documents
# Padding must be done before sampling
dataset = self.dataset.padded_batch(2, self.padded_shapes, self.padding_values)
x, y = tf.data.experimental.get_single_element(dataset.take(1))
x, y = sample(x, y, group_size, multiple, method='windowed', seed=-1)
scores = model(x, training=False)
print(f"gin scores:\n{scores}")
if __name__ == '__main__':
unittest.main()