Source code for deletor.preprocessing

"""
Documentation
"""
# These are ported from the sklearn preprocessing module.

# Python Modules
import abc

# 3rd Party Modules
import numpy as np
import tensorflow as tf

# Project Modules
from deletor.math.utils import spacing


[docs]class DataTransformer(abc.ABC):
[docs]    @abc.abstractmethod
    def transform(self, x: tf.Tensor):
        pass

[docs]    @abc.abstractmethod
    def inverse_transform(self, x: tf.Tensor):
        pass


[docs]class MinMaxScaler(DataTransformer):
    def __init__(
            self,
            min_adjust: np.ndarray,
            scale: np.ndarray,
            data_min: np.ndarray,
            data_max: np.ndarray,
            data_range: np.ndarray,
            n_samples: int
    ):
        self.min_adjust = tf.constant(min_adjust, dtype=tf.float32)
        self.scale = tf.constant(scale, dtype=tf.float32)
        self.data_min = tf.constant(data_min, dtype=tf.float32)
        self.data_max = tf.constant(data_max, dtype=tf.float32)
        self.data_range = tf.constant(data_range, dtype=tf.float32)
        self.n_samples = tf.constant(n_samples)

[docs]    @tf.function
    def transform(self, x: tf.Tensor):
        x *= self.scale
        x += self.min_adjust

        return x

[docs]    @tf.function
    def inverse_transform(self, x: tf.Tensor):
        x -= self.min_adjust
        x /= self.scale

        return x


# noinspection DuplicatedCode
[docs]class StandardScaler(DataTransformer):
    def __init__(
            self,
            scale: np.ndarray,
            mean: np.ndarray,
            var: np.ndarray,
            n_samples: int,
            with_mean: bool = True,
            with_std: bool = True
    ):
        self.scale = tf.constant(scale, dtype=tf.float32)
        self.mean = tf.constant(mean, dtype=tf.float32)
        self.var = tf.constant(var, dtype=tf.float32)
        self.n_samples = tf.constant(n_samples)
        self.with_mean = tf.constant(with_mean)
        self.with_std = tf.constant(with_std)

[docs]    @tf.function
    def transform(self, x: tf.Tensor):
        if self.with_mean:
            x -= self.mean

        if self.with_std:
            x /= self.scale

        return x

[docs]    @tf.function
    def inverse_transform(self, x: tf.Tensor):
        if self.with_std:
            x *= self.scale

        if self.with_mean:
            x += self.mean

        return x


# noinspection DuplicatedCode
[docs]class RobustScaler(DataTransformer):
    def __init__(
            self,
            center: np.ndarray,
            scale: np.ndarray,
            with_centering: bool = True,
            with_scaling: bool = True
    ):
        self.center = tf.constant(center, dtype=tf.float32)
        self.scale = tf.constant(scale, dtype=tf.float32)
        self.with_centering = tf.constant(with_centering)
        self.with_scaling = tf.constant(with_scaling)

[docs]    @tf.function
    def transform(self, x: tf.Tensor):
        if self.with_centering:
            x -= self.center

        if self.with_scaling:
            x /= self.scale

        return x

[docs]    @tf.function
    def inverse_transform(self, x: tf.Tensor):
        if self.with_scaling:
            x *= self.scale

        if self.with_centering:
            x += self.center

        return x


[docs]class PowerTransformer(DataTransformer):
    def __init__(self, lambdas: np.ndarray, scaler=None):
        self.lambdas = tf.constant(lambdas, dtype=tf.float32)
        if scaler:
            self.scaler = StandardScaler(
                scaler.scale_,
                scaler.mean_,
                scaler.var_,
                scaler.n_samples_seen_,
                scaler.with_mean,
                scaler.with_std
            )
        else:
            self.scaler = None

    # noinspection DuplicatedCode
[docs]    @tf.function
    def transform(self, x: tf.Tensor):
        def yeo_johnson(inputs):
            col, lmbda = inputs[0], inputs[1][0]

            mask = tf.cast(tf.math.greater_equal(col, 0), tf.float32)
            inv_mask = tf.cast(tf.math.less(col, 0), tf.float32)

            # When x >= 0
            if tf.abs(lmbda) < spacing(tf.constant(1., dtype=tf.float32)):
                out_1 = mask * tf.math.log1p(col)
            else:
                out_1 = mask * tf.math.divide_no_nan(tf.math.pow(col + 1, lmbda) - 1, lmbda)

            out_1 = tf.where(tf.logical_not(tf.math.is_finite(out_1)), tf.zeros_like(out_1), out_1)

            # When x < 0
            if abs(lmbda - 2) > np.spacing(1.):
                out_2 = -tf.math.divide_no_nan(tf.math.pow(-col + 1, 2 - lmbda) - 1, 2 - lmbda)
                out_2 *= inv_mask
            else:
                out_2 = inv_mask * tf.math.log1p(-col)

            out_2 = tf.where(tf.logical_not(tf.math.is_finite(out_2)), tf.zeros_like(out_2), out_2)

            result = out_1 + out_2

            return result

        x = tf.transpose(x)
        lambdas = tf.reshape(tf.repeat(self.lambdas, tf.shape(x)[1]), tf.shape(x))
        x = tf.map_fn(yeo_johnson, (x, lambdas), dtype=tf.float32)
        x = tf.transpose(x)

        if self.scaler:
            x = self.scaler.transform(x)

        return x

    # noinspection DuplicatedCode
[docs]    @tf.function
    def inverse_transform(self, x: tf.Tensor):
        def yeo_johnson(inputs):
            col, lmbda = inputs[0], inputs[1][0]

            mask = tf.cast(tf.math.greater_equal(col, 0), tf.float32)
            inv_mask = tf.cast(tf.math.less(col, 0), tf.float32)

            if abs(lmbda) < spacing(tf.constant(1., dtype=tf.float32)):
                out_1 = mask * tf.exp(col) - 1
            else:
                out_1 = mask * (tf.math.pow(col * lmbda + 1, 1 / lmbda) - 1)

            out_1 = tf.where(tf.logical_not(tf.math.is_finite(out_1)), tf.zeros_like(out_1), out_1)

            if abs(lmbda - 2) > spacing(tf.constant(1., dtype=tf.float32)):
                out_2 = inv_mask * (1. - tf.math.pow(-(2. - lmbda) * col + 1, 1. / (2. - lmbda)))
            else:
                out_2 = inv_mask * (1. - tf.math.exp(-col))

            out_2 = tf.where(tf.logical_not(tf.math.is_finite(out_2)), tf.zeros_like(out_2), out_2)

            result = out_1 + out_2

            return result

        if self.scaler:
            x = self.scaler.inverse_transform(x)

        x = tf.transpose(x)
        lambdas = tf.reshape(tf.repeat(self.lambdas, tf.shape(x)[1]), tf.shape(x))
        x = tf.map_fn(yeo_johnson, (x, lambdas), dtype=tf.float32)
        x = tf.transpose(x)

        return x