Skip to content

noise

GaussianNoiseClassesGenerator

Bases: NoiseGenerator

A generator that adds Gaussian white noise to each class separately.

Source code in badgers/generators/tabular_data/noise.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
class GaussianNoiseClassesGenerator(NoiseGenerator):
    """
    A generator that adds Gaussian white noise to each class separately.
    """

    def __init__(self, random_generator=default_rng(seed=0)):
        """

        :param random_generator: A random generator
        """
        super().__init__(random_generator=random_generator)

    @preprocess_inputs
    def generate(self, X, y, noise_std_per_class=dict()):
        """
        Add Gaussian white noise to the data.
        the data is first standardized (each column has a mean = 0 and variance = 1).
        The noise is generated from a normal distribution with standard deviation = `noise_std`.
        The noise is added to the data.

        :param X: the input
        :param y: the target
                :param noise_std_per_class: A dictionary giving the standard deviation of the noise to be added for each class
            key = class labels, values = noise std for this given class
        :return: Xt, yt
        """
        # standardize X
        scaler = StandardScaler()
        # fit, transform
        scaler.fit(X)
        Xt = scaler.transform(X)
        # add noise and repeat for each class

        tmp_Xt = []
        tmp_yt = []
        for label, noise_std in noise_std_per_class.items():
            data_class = np.array(Xt[y == label])
            noisy_data_class = data_class + self.random_generator.normal(loc=0, scale=noise_std, size=data_class.shape)
            labels = np.array([label] * data_class.shape[0])
            tmp_Xt.append(noisy_data_class.copy())
            tmp_yt.append(labels)

        Xt = np.concatenate(tmp_Xt, axis=0)
        yt = np.concatenate(tmp_yt, axis=0)
        # inverse standardization
        Xt = scaler.inverse_transform(Xt)
        return pd.DataFrame(data=Xt, columns=X.columns, index=X.index), pd.Series(yt)

__init__(random_generator=default_rng(seed=0))

:param random_generator: A random generator

Source code in badgers/generators/tabular_data/noise.py
70
71
72
73
74
75
def __init__(self, random_generator=default_rng(seed=0)):
    """

    :param random_generator: A random generator
    """
    super().__init__(random_generator=random_generator)

generate(X, y, noise_std_per_class=dict())

Add Gaussian white noise to the data. the data is first standardized (each column has a mean = 0 and variance = 1). The noise is generated from a normal distribution with standard deviation = noise_std. The noise is added to the data.

:param X: the input :param y: the target :param noise_std_per_class: A dictionary giving the standard deviation of the noise to be added for each class key = class labels, values = noise std for this given class :return: Xt, yt

Source code in badgers/generators/tabular_data/noise.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
@preprocess_inputs
def generate(self, X, y, noise_std_per_class=dict()):
    """
    Add Gaussian white noise to the data.
    the data is first standardized (each column has a mean = 0 and variance = 1).
    The noise is generated from a normal distribution with standard deviation = `noise_std`.
    The noise is added to the data.

    :param X: the input
    :param y: the target
            :param noise_std_per_class: A dictionary giving the standard deviation of the noise to be added for each class
        key = class labels, values = noise std for this given class
    :return: Xt, yt
    """
    # standardize X
    scaler = StandardScaler()
    # fit, transform
    scaler.fit(X)
    Xt = scaler.transform(X)
    # add noise and repeat for each class

    tmp_Xt = []
    tmp_yt = []
    for label, noise_std in noise_std_per_class.items():
        data_class = np.array(Xt[y == label])
        noisy_data_class = data_class + self.random_generator.normal(loc=0, scale=noise_std, size=data_class.shape)
        labels = np.array([label] * data_class.shape[0])
        tmp_Xt.append(noisy_data_class.copy())
        tmp_yt.append(labels)

    Xt = np.concatenate(tmp_Xt, axis=0)
    yt = np.concatenate(tmp_yt, axis=0)
    # inverse standardization
    Xt = scaler.inverse_transform(Xt)
    return pd.DataFrame(data=Xt, columns=X.columns, index=X.index), pd.Series(yt)

GaussianNoiseGenerator

Bases: NoiseGenerator

A generator that adds Gaussian white noise to the tabular data

Source code in badgers/generators/tabular_data/noise.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
class GaussianNoiseGenerator(NoiseGenerator):
    """
    A generator that adds Gaussian white noise to the tabular data
    """

    def __init__(self, random_generator=default_rng(seed=0)):
        """

        :param random_generator: A random generator

        """
        super().__init__(random_generator=random_generator)

    @preprocess_inputs
    def generate(self, X, y, noise_std):
        """
        Adds Gaussian white noise to the data.
        The data is first standardized (each column has a mean = 0 and variance = 1).
        The noise is generated from a normal distribution with standard deviation = `noise_std`.
        The noise is added to the data.

        :param X: the input
        :param y: the target
        :param noise_std: The standard deviation of the noise to be added
        :return: Xt, yt
        """
        # standardize X
        scaler = StandardScaler()
        # fit, transform
        Xt = scaler.fit_transform(X)
        # add noise
        Xt += self.random_generator.normal(loc=0, scale=noise_std, size=Xt.shape)
        # inverse standardization
        Xt = scaler.inverse_transform(Xt)
        return pd.DataFrame(data=Xt, columns=X.columns, index=X.index), y

__init__(random_generator=default_rng(seed=0))

:param random_generator: A random generator

Source code in badgers/generators/tabular_data/noise.py
33
34
35
36
37
38
39
def __init__(self, random_generator=default_rng(seed=0)):
    """

    :param random_generator: A random generator

    """
    super().__init__(random_generator=random_generator)

generate(X, y, noise_std)

Adds Gaussian white noise to the data. The data is first standardized (each column has a mean = 0 and variance = 1). The noise is generated from a normal distribution with standard deviation = noise_std. The noise is added to the data.

:param X: the input :param y: the target :param noise_std: The standard deviation of the noise to be added :return: Xt, yt

Source code in badgers/generators/tabular_data/noise.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@preprocess_inputs
def generate(self, X, y, noise_std):
    """
    Adds Gaussian white noise to the data.
    The data is first standardized (each column has a mean = 0 and variance = 1).
    The noise is generated from a normal distribution with standard deviation = `noise_std`.
    The noise is added to the data.

    :param X: the input
    :param y: the target
    :param noise_std: The standard deviation of the noise to be added
    :return: Xt, yt
    """
    # standardize X
    scaler = StandardScaler()
    # fit, transform
    Xt = scaler.fit_transform(X)
    # add noise
    Xt += self.random_generator.normal(loc=0, scale=noise_std, size=Xt.shape)
    # inverse standardization
    Xt = scaler.inverse_transform(Xt)
    return pd.DataFrame(data=Xt, columns=X.columns, index=X.index), y

NoiseGenerator

Bases: GeneratorMixin

Base class for generators that add noise to tabular data

Source code in badgers/generators/tabular_data/noise.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class NoiseGenerator(GeneratorMixin):
    """
    Base class for generators that add noise to tabular data
    """

    def __init__(self, random_generator=default_rng(seed=0)):
        """
        :param random_generator: A random generator
        """
        self.random_generator = random_generator

    @abc.abstractmethod
    def generate(self, X, y, **params):
        pass

__init__(random_generator=default_rng(seed=0))

:param random_generator: A random generator

Source code in badgers/generators/tabular_data/noise.py
17
18
19
20
21
def __init__(self, random_generator=default_rng(seed=0)):
    """
    :param random_generator: A random generator
    """
    self.random_generator = random_generator