Function-Based Regex Builder Module Without a Clever Name
A tool to help you interactively develop regexes.
Use compose()
to join multiple patterns into one.
from rx import compose, group, match, maybe, then
prefix = compose(
match('http'), group(match('s')), maybe(), then('://')
)
print(prefix) # 'http(s)?\:\/\/'
Use rx()
to create a compiled regex object from a pattern.
from rx import compose, group, match, maybe, rx, then
prefix = compose(
match('http'), group(match('s')), maybe(), then('://')
)
compiled_pattern = rx(prefix)
compiled_pattern == re.compile(r'http(s)?\:\/\/', re.UNICODE) # True
In the US, one way a phone number can be expressed is as a sequence of:
- area code: three digits, possibly wrapped with parentheses
- separator: space(s), dash, dot, or non-existent
- prefix: three digits
- separator: space(s), dash, dot, or non-existent
- suffix: four digits
from rx import (compose, dot, digit, exactly_n_times,
group, match, maybe, OR, rx, spaces, then)
area_code = compose(
digit(), exactly_n_times(3),
OR(),
match('('), digit(), exactly_n_times(3), then(')')
)
print(area_code) # '\\d{3}|\\(\\d{3}\\)'
separator = compose(
spaces(), OR(), dot(), OR(), match('-')
)
separator_maybe = compose(group(separator), maybe())
print(separator_maybe) # '(\\s+|\\.|\\-)?'
prefix = compose(digit(), exactly_n_times(3))
print(prefix) # '\\d{3}'
suffix = compose(digit(), exactly_n_times(4))
print(suffix) # '\\d{4}'
phone_number_pattern = compose(
area_code,
separator_maybe,
prefix,
separator_maybe,
suffix
)
rx(phone_number_pattern) == re.compile('\\d{3}|\\(\\d{3}\\)(\\s+|\\.|\\-)?\\d{3}(\\s+|\\.|\\-)?\\d{4}') # True
Pattern Function |
Example Input |
Example Output |
match(v) |
match('Donuts+') |
Donuts\\+ |
then(v) (alias of match(v)) |
then('Donuts+') |
Donuts\\+ |
Pattern Function |
Example Input |
Example Output |
any_of(v) |
any_of('abcd') |
[abcd] |
anything_but(v) |
anything_but('a backpack') |
[^a\\ backpack]* |
char_range(from, to) |
char_range('A','Z') |
A-Z |
something_but(v) |
something_but('nothing') |
[^nothing]+ |
Pattern Function |
Example Input |
Example Output |
group(*patterns) |
group(match('p'), OR(), match('q')) |
(p|q) |
named_group(name, *patterns) |
named_group('secure', match('https://')) |
(?P<secure>https://) |
non_capturing_group(*patterns) |
non_capturing_group(match('http')) |
(?:http) |
Pattern Function |
Example Input |
Example Output |
followed_by(v) |
followed_by('...') |
(?=\\.\\.\\.) |
not_followed_by(v) |
not_followed_by('...') |
(?!\\.\\.\\.) |
Pattern Function |
Example Input |
Example Output |
preceded_by(v) |
preceded_by('...') |
(?<=\\.\\.\\.) |
not_preceded_by(v) |
not_preceded_by('...') |
(?<!\\.\\.\\.) |
Pattern Function |
Output |
between_n_and_m_times(n,m) |
{n,m} |
between_n_and_m_times_lazy(n,m) |
{n,m}? |
maybe() |
? |
one_or_more_times() |
+ |
OR() |
| |
zero_or_more_times() |
* |
Pattern Function |
Output |
digit() |
\d |
end_of_line() |
$ |
space() |
\s |
spaces() |
\s+ |
start_of_line() |
^ |
tab() |
\t |
word() |
\w+ |
word_boundary() |
\b |
Pattern Function |
Output |
anything() |
.* |
dot() |
\\. |
linebreak() |
(?:(?:\n)|(?:\r\n)) |
something() |
.+ |