Skip to content

instance_sampling

UniformInstanceAttributeSampling

Bases: OutliersGenerator

Randomly generates outliers by sampling from existing instances attributes uniformly at random

Source code in badgers/generators/tabular_data/outliers/instance_sampling.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class UniformInstanceAttributeSampling(OutliersGenerator):
    """
    Randomly generates outliers by sampling from existing instances attributes uniformly at random
    """

    def __init__(self, random_generator=default_rng(seed=0)):
        """
        Initialize the UniformInstanceAttributeSampling with a random number generator.

        :param random_generator: An instance of numpy's random number generator (default is a new generator with seed 0).
        """
        super().__init__(random_generator)

    @preprocess_inputs
    def generate(self, X, y, n_outliers: int = 10):
        """


        :param X: the input features (pandas DataFrame or numpy array).
        :param y: the class labels, target values, or None (if not provided).
        :param n_outliers: The number of outliers to generate.
        :return: A tuple containing the augmented feature matrix with added outliers and the corresponding target values.
                 If `y` is None, the returned target values will also be None.
        """

        outliers = pd.DataFrame(
            data=np.stack([self.random_generator.choice(X.iloc[:,i], size=n_outliers) for i in range(X.shape[1])]).T,
            columns = X.columns
        )

        # add "outliers" as labels for outliers
        yt = np.array(["outliers"] * len(outliers))

        return outliers, yt

__init__(random_generator=default_rng(seed=0))

Initialize the UniformInstanceAttributeSampling with a random number generator.

Parameters:

Name Type Description Default
random_generator

An instance of numpy's random number generator (default is a new generator with seed 0).

default_rng(seed=0)
Source code in badgers/generators/tabular_data/outliers/instance_sampling.py
14
15
16
17
18
19
20
def __init__(self, random_generator=default_rng(seed=0)):
    """
    Initialize the UniformInstanceAttributeSampling with a random number generator.

    :param random_generator: An instance of numpy's random number generator (default is a new generator with seed 0).
    """
    super().__init__(random_generator)

generate(X, y, n_outliers=10)

Parameters:

Name Type Description Default
X

the input features (pandas DataFrame or numpy array).

required
y

the class labels, target values, or None (if not provided).

required
n_outliers int

The number of outliers to generate.

10

Returns:

Type Description

A tuple containing the augmented feature matrix with added outliers and the corresponding target values. If y is None, the returned target values will also be None.

Source code in badgers/generators/tabular_data/outliers/instance_sampling.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
@preprocess_inputs
def generate(self, X, y, n_outliers: int = 10):
    """


    :param X: the input features (pandas DataFrame or numpy array).
    :param y: the class labels, target values, or None (if not provided).
    :param n_outliers: The number of outliers to generate.
    :return: A tuple containing the augmented feature matrix with added outliers and the corresponding target values.
             If `y` is None, the returned target values will also be None.
    """

    outliers = pd.DataFrame(
        data=np.stack([self.random_generator.choice(X.iloc[:,i], size=n_outliers) for i in range(X.shape[1])]).T,
        columns = X.columns
    )

    # add "outliers" as labels for outliers
    yt = np.array(["outliers"] * len(outliers))

    return outliers, yt