You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

460 lines
11 KiB

mat!(ascii_literal, r"a", "a", Some((0, 1)));
// Some crazy expressions from regular-expressions.info.
mat!(
match_ranges,
r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
"num: 255",
Some((5, 8))
);
mat!(
match_ranges_not,
r"(?-u)\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
"num: 256",
None
);
mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)));
mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)));
mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)));
mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None);
mat!(
match_email,
r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
"mine is jam.slam@gmail.com ",
Some((8, 26))
);
mat!(
match_email_not,
r"(?i-u)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
"mine is jam.slam@gmail ",
None
);
mat!(
match_email_big,
r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
"mine is jam.slam@gmail.com ",
Some((8, 26))
);
mat!(
match_date1,
r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
"1900-01-01",
Some((0, 10))
);
mat!(
match_date2,
r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
"1900-00-01",
None
);
mat!(
match_date3,
r"(?-u)^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
"1900-13-01",
None
);
// Do some crazy dancing with the start/end assertions.
matiter!(match_start_end_empty, r"^$", "", (0, 0));
matiter!(match_start_end_empty_many_1, r"^$^$^$", "", (0, 0));
matiter!(match_start_end_empty_many_2, r"^^^$$$", "", (0, 0));
matiter!(match_start_end_empty_rev, r"$^", "", (0, 0));
matiter!(
match_start_end_empty_rep,
r"(?:^$)*",
"a\nb\nc",
(0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 4),
(5, 5)
);
matiter!(
match_start_end_empty_rep_rev,
r"(?:$^)*",
"a\nb\nc",
(0, 0),
(1, 1),
(2, 2),
(3, 3),
(4, 4),
(5, 5)
);
// Test negated character classes.
mat!(negclass_letters, r"[^ac]", "acx", Some((2, 3)));
mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3)));
mat!(negclass_letter_space, r"[^a[:space:]]", "a x", Some((2, 3)));
mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3)));
mat!(negclass_space, r"[^[:space:]]", " a", Some((1, 2)));
mat!(negclass_space_comma, r"[^,[:space:]]", ", a", Some((2, 3)));
mat!(negclass_comma_space, r"[^[:space:],]", " ,a", Some((2, 3)));
mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));
// Test that repeated empty expressions don't loop forever.
mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
// Test that we handle various flavors of empty expressions.
matiter!(match_empty1, r"", "", (0, 0));
matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
matiter!(match_empty12, r"|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty13, r"b|", "abc", (0, 0), (1, 2), (3, 3));
matiter!(match_empty14, r"|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty15, r"z|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty16, r"|", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty17, r"||", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty18, r"||z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty19, r"(?:)|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty20, r"b|(?:)", "abc", (0, 0), (1, 2), (3, 3));
matiter!(match_empty21, r"(?:|)", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty22, r"(?:|)|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
matiter!(match_empty23, r"a(?:)|b", "abc", (0, 1), (1, 2));
// Test that the DFA can handle pathological cases.
// (This should result in the DFA's cache being flushed too frequently, which
// should cause it to quit and fall back to the NFA algorithm.)
#[test]
fn dfa_handles_pathological_case() {
fn ones_and_zeroes(count: usize) -> String {
use rand::rngs::SmallRng;
use rand::{Rng, SeedableRng};
let mut rng = SmallRng::from_entropy();
let mut s = String::new();
for _ in 0..count {
if rng.gen() {
s.push('1');
} else {
s.push('0');
}
}
s
}
let re = regex!(r"[01]*1[01]{20}$");
let text = {
let mut pieces = ones_and_zeroes(100_000);
pieces.push('1');
pieces.push_str(&ones_and_zeroes(20));
pieces
};
assert!(re.is_match(text!(&*text)));
}
#[test]
fn nest_limit_makes_it_parse() {
use regex::RegexBuilder;
RegexBuilder::new(
r#"(?-u)
2(?:
[45]\d{3}|
7(?:
1[0-267]|
2[0-289]|
3[0-29]|
4[01]|
5[1-3]|
6[013]|
7[0178]|
91
)|
8(?:
0[125]|
[139][1-6]|
2[0157-9]|
41|
6[1-35]|
7[1-5]|
8[1-8]|
90
)|
9(?:
0[0-2]|
1[0-4]|
2[568]|
3[3-6]|
5[5-7]|
6[0167]|
7[15]|
8[0146-9]
)
)\d{4}|
3(?:
12?[5-7]\d{2}|
0(?:
2(?:
[025-79]\d|
[348]\d{1,2}
)|
3(?:
[2-4]\d|
[56]\d?
)
)|
2(?:
1\d{2}|
2(?:
[12]\d|
[35]\d{1,2}|
4\d?
)
)|
3(?:
1\d{2}|
2(?:
[2356]\d|
4\d{1,2}
)
)|
4(?:
1\d{2}|
2(?:
2\d{1,2}|
[47]|
5\d{2}
)
)|
5(?:
1\d{2}|
29
)|
[67]1\d{2}|
8(?:
1\d{2}|
2(?:
2\d{2}|
3|
4\d
)
)
)\d{3}|
4(?:
0(?:
2(?:
[09]\d|
7
)|
33\d{2}
)|
1\d{3}|
2(?:
1\d{2}|
2(?:
[25]\d?|
[348]\d|
[67]\d{1,2}
)
)|
3(?:
1\d{2}(?:
\d{2}
)?|
2(?:
[045]\d|
[236-9]\d{1,2}
)|
32\d{2}
)|
4(?:
[18]\d{2}|
2(?:
[2-46]\d{2}|
3
)|
5[25]\d{2}
)|
5(?:
1\d{2}|
2(?:
3\d|
5
)
)|
6(?:
[18]\d{2}|
2(?:
3(?:
\d{2}
)?|
[46]\d{1,2}|
5\d{2}|
7\d
)|
5(?:
3\d?|
4\d|
[57]\d{1,2}|
6\d{2}|
8
)
)|
71\d{2}|
8(?:
[18]\d{2}|
23\d{2}|
54\d{2}
)|
9(?:
[18]\d{2}|
2[2-5]\d{2}|
53\d{1,2}
)
)\d{3}|
5(?:
02[03489]\d{2}|
1\d{2}|
2(?:
1\d{2}|
2(?:
2(?:
\d{2}
)?|
[457]\d{2}
)
)|
3(?:
1\d{2}|
2(?:
[37](?:
\d{2}
)?|
[569]\d{2}
)
)|
4(?:
1\d{2}|
2[46]\d{2}
)|
5(?:
1\d{2}|
26\d{1,2}
)|
6(?:
[18]\d{2}|
2|
53\d{2}
)|
7(?:
1|
24
)\d{2}|
8(?:
1|
26
)\d{2}|
91\d{2}
)\d{3}|
6(?:
0(?:
1\d{2}|
2(?:
3\d{2}|
4\d{1,2}
)
)|
2(?:
2[2-5]\d{2}|
5(?:
[3-5]\d{2}|
7
)|
8\d{2}
)|
3(?:
1|
2[3478]
)\d{2}|
4(?:
1|
2[34]
)\d{2}|
5(?:
1|
2[47]
)\d{2}|
6(?:
[18]\d{2}|
6(?:
2(?:
2\d|
[34]\d{2}
)|
5(?:
[24]\d{2}|
3\d|
5\d{1,2}
)
)
)|
72[2-5]\d{2}|
8(?:
1\d{2}|
2[2-5]\d{2}
)|
9(?:
1\d{2}|
2[2-6]\d{2}
)
)\d{3}|
7(?:
(?:
02|
[3-589]1|
6[12]|
72[24]
)\d{2}|
21\d{3}|
32
)\d{3}|
8(?:
(?:
4[12]|
[5-7]2|
1\d?
)|
(?:
0|
3[12]|
[5-7]1|
217
)\d
)\d{4}|
9(?:
[35]1|
(?:
[024]2|
81
)\d|
(?:
1|
[24]1
)\d{2}
)\d{3}
"#,
)
.build()
.unwrap();
}