Skip to content

drift

DriftGenerator

Bases: GeneratorMixin

Base class for transformers that add noise to tabular data

Source code in badgers/generators/tabular_data/drift.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class DriftGenerator(GeneratorMixin):
    """
    Base class for transformers that add noise to tabular data
    """

    def __init__(self, random_generator=default_rng(seed=0)):
        """
        :param random_generator: numpy.random.Generator, default default_rng(seed=0)
            A random generator
        """
        self.random_generator = random_generator

    @abc.abstractmethod
    def generate(self, X, y, **params):
        pass

__init__(random_generator=default_rng(seed=0))

:param random_generator: numpy.random.Generator, default default_rng(seed=0) A random generator

Source code in badgers/generators/tabular_data/drift.py
15
16
17
18
19
20
def __init__(self, random_generator=default_rng(seed=0)):
    """
    :param random_generator: numpy.random.Generator, default default_rng(seed=0)
        A random generator
    """
    self.random_generator = random_generator

RandomShiftClassesGenerator

Bases: DriftGenerator

Randomly shift (geometrical translation) values of each class independently of one another. Data are first standardized (mean = 0, var = 1) and for each class a random number is added to all instances.

Source code in badgers/generators/tabular_data/drift.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
class RandomShiftClassesGenerator(DriftGenerator):
    """
    Randomly shift (geometrical translation) values of each class independently of one another.
    Data are first standardized (mean = 0, var = 1) and
    for each class a random number is added to all instances.
    """

    def __init__(self, random_generator=default_rng(seed=0), shift_std: float = 0.1):
        """

        :param random_generator: A random generator
        :param shift_std: The standard deviation of the amount of shift applied (shift is chosen from a normal distribution)
        """
        super().__init__(random_generator=random_generator)
        self.shift_std = shift_std

    def generate(self, X, y, **params):
        """
        Randomly shift (geometrical translation) values of each class independently of one another.
        Data are first standardized (mean = 0, var = 1) and
        for each class a random number is added to all instances.
        """
        # extract unique labels
        classes = np.unique(y)
        # normalize X
        scaler = StandardScaler()
        scaler.fit(X)
        Xt = scaler.transform(X)
        # generate random values for the shift
        shifts = self.random_generator.normal(loc=0, scale=self.shift_std, size=len(classes))
        # add shift
        for c, s in zip(classes, shifts):
            Xt[y == c] += s
        # inverse transform
        return scaler.inverse_transform(Xt), y

__init__(random_generator=default_rng(seed=0), shift_std=0.1)

:param random_generator: A random generator :param shift_std: The standard deviation of the amount of shift applied (shift is chosen from a normal distribution)

Source code in badgers/generators/tabular_data/drift.py
73
74
75
76
77
78
79
80
def __init__(self, random_generator=default_rng(seed=0), shift_std: float = 0.1):
    """

    :param random_generator: A random generator
    :param shift_std: The standard deviation of the amount of shift applied (shift is chosen from a normal distribution)
    """
    super().__init__(random_generator=random_generator)
    self.shift_std = shift_std

generate(X, y, **params)

Randomly shift (geometrical translation) values of each class independently of one another. Data are first standardized (mean = 0, var = 1) and for each class a random number is added to all instances.

Source code in badgers/generators/tabular_data/drift.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def generate(self, X, y, **params):
    """
    Randomly shift (geometrical translation) values of each class independently of one another.
    Data are first standardized (mean = 0, var = 1) and
    for each class a random number is added to all instances.
    """
    # extract unique labels
    classes = np.unique(y)
    # normalize X
    scaler = StandardScaler()
    scaler.fit(X)
    Xt = scaler.transform(X)
    # generate random values for the shift
    shifts = self.random_generator.normal(loc=0, scale=self.shift_std, size=len(classes))
    # add shift
    for c, s in zip(classes, shifts):
        Xt[y == c] += s
    # inverse transform
    return scaler.inverse_transform(Xt), y

RandomShiftGenerator

Bases: DriftGenerator

Randomly shift (geometrical translation) values of each column independently of one another. Data are first standardized (mean = 0, var = 1) and a random number is added to each column. The ith columns is simply translated: $x_i \left arrow x_i + \epsilon_i$

Source code in badgers/generators/tabular_data/drift.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class RandomShiftGenerator(DriftGenerator):
    """
    Randomly shift (geometrical translation) values of each column independently of one another.
    Data are first standardized (mean = 0, var = 1) and a random number is added to each column.
    The ith columns is simply translated: `$x_i \left arrow x_i + \epsilon_i$`
    """

    def __init__(self, random_generator=default_rng(seed=0), shift_std: float = 0.1):
        """

        :param random_generator: A random generator
        :param shift_std: The standard deviation of the amount of shift applied (shift is chosen from a normal distribution)
        """
        super().__init__(random_generator=random_generator)
        self.shift_std = shift_std

    def generate(self, X, y=None, **params):
        """
        Randomly shift (geometrical translation) values of each column independently of one another.
        Data are first standardized (mean = 0, var = 1) and a random number is added to each column.
        The ith columns is simply translated: `$x_i \left arrow x_i + \epsilon_i$`

        :param X:
        :param y:
        :param params:
        :return:
        """
        # normalize X
        scaler = StandardScaler()
        scaler.fit(X)
        Xt = scaler.transform(X)
        # generate random values for the shift for each column
        shift = self.random_generator.normal(loc=0, scale=self.shift_std, size=X.shape[1])
        # add shift
        Xt += shift
        # inverse transform
        return scaler.inverse_transform(Xt), y

__init__(random_generator=default_rng(seed=0), shift_std=0.1)

:param random_generator: A random generator :param shift_std: The standard deviation of the amount of shift applied (shift is chosen from a normal distribution)

Source code in badgers/generators/tabular_data/drift.py
34
35
36
37
38
39
40
41
def __init__(self, random_generator=default_rng(seed=0), shift_std: float = 0.1):
    """

    :param random_generator: A random generator
    :param shift_std: The standard deviation of the amount of shift applied (shift is chosen from a normal distribution)
    """
    super().__init__(random_generator=random_generator)
    self.shift_std = shift_std

generate(X, y=None, **params)

Randomly shift (geometrical translation) values of each column independently of one another. Data are first standardized (mean = 0, var = 1) and a random number is added to each column. The ith columns is simply translated: $x_i \left arrow x_i + \epsilon_i$

:param X: :param y: :param params: :return:

Source code in badgers/generators/tabular_data/drift.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def generate(self, X, y=None, **params):
    """
    Randomly shift (geometrical translation) values of each column independently of one another.
    Data are first standardized (mean = 0, var = 1) and a random number is added to each column.
    The ith columns is simply translated: `$x_i \left arrow x_i + \epsilon_i$`

    :param X:
    :param y:
    :param params:
    :return:
    """
    # normalize X
    scaler = StandardScaler()
    scaler.fit(X)
    Xt = scaler.transform(X)
    # generate random values for the shift for each column
    shift = self.random_generator.normal(loc=0, scale=self.shift_std, size=X.shape[1])
    # add shift
    Xt += shift
    # inverse transform
    return scaler.inverse_transform(Xt), y