Adding typos to text¶
In [1]:
Copied!
from badgers.generators.text.typos import SwapLettersGenerator, LeetSpeakGenerator, SwapCaseGenerator
from badgers.generators.text.typos import SwapLettersGenerator, LeetSpeakGenerator, SwapCaseGenerator
Setup random generator¶
In [2]:
Copied!
from numpy.random import default_rng
seed = 0
rng = default_rng(seed)
from numpy.random import default_rng
seed = 0
rng = default_rng(seed)
Data¶
In [3]:
Copied!
X = "the quick brown fox jumps over the lazy dog".split(' ')
X = "the quick brown fox jumps over the lazy dog".split(' ')
Swapping letter randomly¶
In [4]:
Copied!
swap_letters = SwapLettersGenerator(random_generator=rng)
swap_letters = SwapLettersGenerator(random_generator=rng)
In [5]:
Copied!
Xt, _ = swap_letters.generate(X.copy(), y=None, swap_proba=1)
Xt, _ = swap_letters.generate(X.copy(), y=None, swap_proba=1)
In [6]:
Copied!
print('Original:\t'+' '.join(X))
print('Transformed:\t'+' '.join(Xt))
print('Original:\t'+' '.join(X))
print('Transformed:\t'+' '.join(Xt))
Original: the quick brown fox jumps over the lazy dog Transformed: the qucik borwn fox jmups oevr the lzay dog
Leet Speak¶
In [7]:
Copied!
leet_speak = LeetSpeakGenerator()
leet_speak = LeetSpeakGenerator()
In [8]:
Copied!
Xt, _ = leet_speak.generate(X.copy(), y=None, replacement_proba=0.25)
Xt, _ = leet_speak.generate(X.copy(), y=None, replacement_proba=0.25)
In [9]:
Copied!
print('Original:\t'+' '.join(X))
print('Transformed:\t'+' '.join(Xt))
print('Original:\t'+' '.join(X))
print('Transformed:\t'+' '.join(Xt))
Original: the quick brown fox jumps over the lazy dog Transformed: th3 quick br0w^ /=ox ju/\/\ps over the l4zy dog
Swap case¶
In [10]:
Copied!
swap_case = SwapCaseGenerator()
swap_case = SwapCaseGenerator()
In [11]:
Copied!
Xt, _ = swap_case.generate(X.copy(), y=None, swapcase_proba=0.25)
Xt, _ = swap_case.generate(X.copy(), y=None, swapcase_proba=0.25)
In [12]:
Copied!
print('Original:\t'+' '.join(X))
print('Transformed:\t'+' '.join(Xt))
print('Original:\t'+' '.join(X))
print('Transformed:\t'+' '.join(Xt))
Original: the quick brown fox jumps over the lazy dog Transformed: thE Quick broWn FoX jumpS Over the lazy Dog
Using pipelines¶
In [13]:
Copied!
from badgers.core.pipeline import Pipeline
from badgers.core.pipeline import Pipeline
In [14]:
Copied!
generators = {'swap_letters': swap_letters, 'leet_speak': leet_speak, 'swap_case': swap_case}
params = {'swap_letters': {'swap_proba':0.5}, 'leet_speak': {'replacement_proba':0.25}, 'swap_case': {'swapcase_proba':0.25}}
pipeline = Pipeline(generators=generators)
Xt, _ = pipeline.generate(X.copy(), y=None, params=params)
generators = {'swap_letters': swap_letters, 'leet_speak': leet_speak, 'swap_case': swap_case}
params = {'swap_letters': {'swap_proba':0.5}, 'leet_speak': {'replacement_proba':0.25}, 'swap_case': {'swapcase_proba':0.25}}
pipeline = Pipeline(generators=generators)
Xt, _ = pipeline.generate(X.copy(), y=None, params=params)
In [15]:
Copied!
print('Original:\t'+' '.join(X))
print('Transformed:\t'+' '.join(Xt))
print('Original:\t'+' '.join(X))
print('Transformed:\t'+' '.join(Xt))
Original: the quick brown fox jumps over the lazy dog Transformed: the quick /3®Ow^ vo)( Jv(v)|^eHS ov€R t|~|e lzay dog