Skip to content

Commit f2d0cfd

Browse files
authored
Handle dash in the regex2glob function (#795)
1 parent e96852f commit f2d0cfd

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

python/mlcroissant/mlcroissant/_src/core/regex.py

+2
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ def _regex_to_glob_for_str(regex: str) -> Iterable[str]:
6565
regex = re.sub(r"\.\*", "*", regex)
6666
# Interpret .+ as *
6767
regex = re.sub(r"\.\+", "*", regex)
68+
# Interpret \\- as -
69+
regex = re.sub(r"\\-", "-", regex)
6870
return [regex]
6971

7072

python/mlcroissant/mlcroissant/_src/core/regex_test.py

+6
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@
2727
"*/train/*.parquet", # ...to a valid glob pattern.
2828
],
2929
],
30+
[
31+
"^.+/my\\-train/.*\.parquet$", # From a valid regex...
32+
[
33+
"*/my-train/*.parquet", # ...to a valid glob pattern.
34+
],
35+
],
3036
],
3137
)
3238
def test_regex_to_glob(regex: str, output: list[str]):

0 commit comments

Comments
 (0)