Skip to content

noise

GaussianNoiseClassesGenerator

Bases: NoiseGenerator

A generator that adds Gaussian white noise to each class separately.

Source code in badgers/generators/tabular_data/noise.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
class GaussianNoiseClassesGenerator(NoiseGenerator):
    """
    A generator that adds Gaussian white noise to each class separately.
    """

    def __init__(self, random_generator=default_rng(seed=0), repeat=1, noise_std_per_class: dict = None):
        """

        :param random_generator: A random generator
        :param noise_std_per_class: A dictionary giving the standard deviation of the noise to be added for each class
            key = class labels, values = noise std for this given class
        """
        super().__init__(random_generator=random_generator, repeat=repeat)
        self.noise_std_per_class = noise_std_per_class

    def generate(self, X, y, **params):
        """
        Add Gaussian white noise to the data.
        the data is first standardized (each column has a mean = 0 and variance = 1).
        The noise is generated from a normal distribution with standard deviation = `noise_std`.
        The noise is added to the data.

        :param X: the input
        :param y: the target
        :param params: optional parameters
        :return: Xt, yt
        """
        # standardize X
        scaler = StandardScaler()
        # fit, transform
        scaler.fit(X)
        Xt = scaler.transform(X)
        # add noise and repeat for each class

        tmp_Xt = []
        tmp_yt = []
        for label, noise_std in self.noise_std_per_class.items():
            data_class = np.array(Xt[y == label])
            noisy_data_class = np.concatenate(
                [
                    data_class + self.random_generator.normal(loc=0, scale=noise_std, size=data_class.shape)
                    for _ in range(self.repeat)
                ],
                axis=0
            )
            labels = [label] * self.repeat * data_class.shape[0]
            tmp_Xt.append(noisy_data_class.copy())
            tmp_yt.append(labels)

        Xt = np.concatenate(tmp_Xt, axis=0)
        yt = np.concatenate(tmp_yt, axis=0)
        # inverse pca
        return scaler.inverse_transform(Xt), yt

__init__(random_generator=default_rng(seed=0), repeat=1, noise_std_per_class=None)

:param random_generator: A random generator :param noise_std_per_class: A dictionary giving the standard deviation of the noise to be added for each class key = class labels, values = noise std for this given class

Source code in badgers/generators/tabular_data/noise.py
80
81
82
83
84
85
86
87
88
def __init__(self, random_generator=default_rng(seed=0), repeat=1, noise_std_per_class: dict = None):
    """

    :param random_generator: A random generator
    :param noise_std_per_class: A dictionary giving the standard deviation of the noise to be added for each class
        key = class labels, values = noise std for this given class
    """
    super().__init__(random_generator=random_generator, repeat=repeat)
    self.noise_std_per_class = noise_std_per_class

generate(X, y, **params)

Add Gaussian white noise to the data. the data is first standardized (each column has a mean = 0 and variance = 1). The noise is generated from a normal distribution with standard deviation = noise_std. The noise is added to the data.

:param X: the input :param y: the target :param params: optional parameters :return: Xt, yt

Source code in badgers/generators/tabular_data/noise.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def generate(self, X, y, **params):
    """
    Add Gaussian white noise to the data.
    the data is first standardized (each column has a mean = 0 and variance = 1).
    The noise is generated from a normal distribution with standard deviation = `noise_std`.
    The noise is added to the data.

    :param X: the input
    :param y: the target
    :param params: optional parameters
    :return: Xt, yt
    """
    # standardize X
    scaler = StandardScaler()
    # fit, transform
    scaler.fit(X)
    Xt = scaler.transform(X)
    # add noise and repeat for each class

    tmp_Xt = []
    tmp_yt = []
    for label, noise_std in self.noise_std_per_class.items():
        data_class = np.array(Xt[y == label])
        noisy_data_class = np.concatenate(
            [
                data_class + self.random_generator.normal(loc=0, scale=noise_std, size=data_class.shape)
                for _ in range(self.repeat)
            ],
            axis=0
        )
        labels = [label] * self.repeat * data_class.shape[0]
        tmp_Xt.append(noisy_data_class.copy())
        tmp_yt.append(labels)

    Xt = np.concatenate(tmp_Xt, axis=0)
    yt = np.concatenate(tmp_yt, axis=0)
    # inverse pca
    return scaler.inverse_transform(Xt), yt

GaussianNoiseGenerator

Bases: NoiseGenerator

A generator that adds Gaussian white noise to the tabular data

Source code in badgers/generators/tabular_data/noise.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class GaussianNoiseGenerator(NoiseGenerator):
    """
    A generator that adds Gaussian white noise to the tabular data
    """

    def __init__(self, random_generator=default_rng(seed=0), noise_std: float = 0.1, repeat=1):
        """

        :param random_generator: A random generator
        :param noise_std: The standard deviation of the noise to be added
        """
        super().__init__(random_generator=random_generator, repeat=repeat)
        self.noise_std = noise_std

    def generate(self, X, y, **params):
        """
        Adds Gaussian white noise to the data.
        The data is first standardized (each column has a mean = 0 and variance = 1).
        The noise is generated from a normal distribution with standard deviation = `noise_std`.
        The noise is added to the data.

        :param X: the input
        :param y: the target
        :param params: optional parameters
        :return: Xt, yt
        """
        # standardize X
        scaler = StandardScaler()
        # fit, transform
        scaler.fit(X)
        Xt = scaler.transform(X)
        # add noise and repeat
        Xt = np.concatenate(
            [
                Xt + self.random_generator.normal(loc=0, scale=self.noise_std, size=Xt.shape)
                for _ in range(self.repeat)
            ], axis=0
        )
        if y is not None:
            yt = np.concatenate([y] * self.repeat, axis=0)
        else:
            yt = None
        # inverse pca
        return scaler.inverse_transform(Xt), yt

__init__(random_generator=default_rng(seed=0), noise_std=0.1, repeat=1)

:param random_generator: A random generator :param noise_std: The standard deviation of the noise to be added

Source code in badgers/generators/tabular_data/noise.py
34
35
36
37
38
39
40
41
def __init__(self, random_generator=default_rng(seed=0), noise_std: float = 0.1, repeat=1):
    """

    :param random_generator: A random generator
    :param noise_std: The standard deviation of the noise to be added
    """
    super().__init__(random_generator=random_generator, repeat=repeat)
    self.noise_std = noise_std

generate(X, y, **params)

Adds Gaussian white noise to the data. The data is first standardized (each column has a mean = 0 and variance = 1). The noise is generated from a normal distribution with standard deviation = noise_std. The noise is added to the data.

:param X: the input :param y: the target :param params: optional parameters :return: Xt, yt

Source code in badgers/generators/tabular_data/noise.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def generate(self, X, y, **params):
    """
    Adds Gaussian white noise to the data.
    The data is first standardized (each column has a mean = 0 and variance = 1).
    The noise is generated from a normal distribution with standard deviation = `noise_std`.
    The noise is added to the data.

    :param X: the input
    :param y: the target
    :param params: optional parameters
    :return: Xt, yt
    """
    # standardize X
    scaler = StandardScaler()
    # fit, transform
    scaler.fit(X)
    Xt = scaler.transform(X)
    # add noise and repeat
    Xt = np.concatenate(
        [
            Xt + self.random_generator.normal(loc=0, scale=self.noise_std, size=Xt.shape)
            for _ in range(self.repeat)
        ], axis=0
    )
    if y is not None:
        yt = np.concatenate([y] * self.repeat, axis=0)
    else:
        yt = None
    # inverse pca
    return scaler.inverse_transform(Xt), yt

NoiseGenerator

Bases: GeneratorMixin

Base class for generators that add noise to tabular data

Source code in badgers/generators/tabular_data/noise.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class NoiseGenerator(GeneratorMixin):
    """
    Base class for generators that add noise to tabular data
    """

    def __init__(self, random_generator=default_rng(seed=0), repeat=1):
        """
        :param random_generator: A random generator
        :param repeat: number of times a noisy point is generated from the original.
            repeat = 1 means that Xt.shape[0] == X.shape[0], repeat = 10 means that Xt.shape[0] == 10 * X.shape[0]
        """
        self.random_generator = random_generator
        self.repeat = repeat

    @abc.abstractmethod
    def generate(self, X, y=None, **params):
        pass

__init__(random_generator=default_rng(seed=0), repeat=1)

:param random_generator: A random generator :param repeat: number of times a noisy point is generated from the original. repeat = 1 means that Xt.shape[0] == X.shape[0], repeat = 10 means that Xt.shape[0] == 10 * X.shape[0]

Source code in badgers/generators/tabular_data/noise.py
15
16
17
18
19
20
21
22
def __init__(self, random_generator=default_rng(seed=0), repeat=1):
    """
    :param random_generator: A random generator
    :param repeat: number of times a noisy point is generated from the original.
        repeat = 1 means that Xt.shape[0] == X.shape[0], repeat = 10 means that Xt.shape[0] == 10 * X.shape[0]
    """
    self.random_generator = random_generator
    self.repeat = repeat