|
| 1 | +(define (maybe-match->list rx str . o) |
| 2 | + (let ((res (apply regexp-matches rx str o))) |
| 3 | + (and res (regexp-match->list res)))) |
| 4 | + |
| 5 | +(define-syntax test-re |
| 6 | + (syntax-rules () |
| 7 | + ((test-re res rx str start end) |
| 8 | + (test-equal res (maybe-match->list rx str start end))) |
| 9 | + ((test-re res rx str start) |
| 10 | + (test-re res rx str start (string-length str))) |
| 11 | + ((test-re res rx str) |
| 12 | + (test-re res rx str 0)))) |
| 13 | + |
| 14 | +(define (maybe-search->list rx str . o) |
| 15 | + (let ((res (apply regexp-search rx str o))) |
| 16 | + (and res (regexp-match->list res)))) |
| 17 | + |
| 18 | +(define-syntax test-re-search |
| 19 | + (syntax-rules () |
| 20 | + ((test-re-search res rx str start end) |
| 21 | + (test-equal res (maybe-search->list rx str start end))) |
| 22 | + ((test-re-search res rx str start) |
| 23 | + (test-re-search res rx str start (string-length str))) |
| 24 | + ((test-re-search res rx str) |
| 25 | + (test-re-search res rx str 0)))) |
| 26 | + |
| 27 | +(test-begin "regexp") |
| 28 | + |
| 29 | +(test-re '("ababc" "abab") |
| 30 | + '(: ($ (* "ab")) "c") |
| 31 | + "ababc") |
| 32 | + |
| 33 | +(test-re '("ababc" "abab") |
| 34 | + '(: ($ (* "ab")) "c") |
| 35 | + "xababc" |
| 36 | + 1) |
| 37 | + |
| 38 | +(test-re-search '("y") '(: "y") "xy") |
| 39 | + |
| 40 | +(test-re-search '("ababc" "abab") |
| 41 | + '(: ($ (* "ab")) "c") |
| 42 | + "xababc") |
| 43 | + |
| 44 | +(test-re #f |
| 45 | + '(: (* any) ($ "foo" (* any)) ($ "bar" (* any))) |
| 46 | + "fooxbafba") |
| 47 | + |
| 48 | +(test-re '("fooxbarfbar" "fooxbarf" "bar") |
| 49 | + '(: (* any) ($ "foo" (* any)) ($ "bar" (* any))) |
| 50 | + "fooxbarfbar") |
| 51 | + |
| 52 | +(test-re '("abcd" "abcd") |
| 53 | + '($ (* (or "ab" "cd"))) |
| 54 | + "abcd") |
| 55 | + |
| 56 | +(test-equal "ab" |
| 57 | + (regexp-match-submatch |
| 58 | + (regexp-matches '(or (-> foo "ab") (-> foo "cd")) "ab") |
| 59 | + 'foo)) |
| 60 | + |
| 61 | +(test-equal "cd" |
| 62 | + (regexp-match-submatch |
| 63 | + (regexp-matches '(or (-> foo "ab") (-> foo "cd")) "cd") |
| 64 | + 'foo)) |
| 65 | + |
| 66 | +;; non-deterministic case from issue #229 |
| 67 | +(let* ((elapsed '(: (** 1 2 num) ":" num num (? ":" num num))) |
| 68 | + (span (rx ,elapsed "-" ,elapsed))) |
| 69 | + (test-re-search '("1:45:02-2:06:13") span " 1:45:02-2:06:13 ")) |
| 70 | + |
| 71 | +(test-re '("ababc" "abab") |
| 72 | + '(: bos ($ (* "ab")) "c") |
| 73 | + "ababc") |
| 74 | +(test-re '("ababc" "abab") |
| 75 | + '(: ($ (* "ab")) "c" eos) |
| 76 | + "ababc") |
| 77 | +(test-re '("ababc" "abab") |
| 78 | + '(: bos ($ (* "ab")) "c" eos) |
| 79 | + "ababc") |
| 80 | +(test-re #f |
| 81 | + '(: bos ($ (* "ab")) eos "c") |
| 82 | + "ababc") |
| 83 | +(test-re #f |
| 84 | + '(: ($ (* "ab")) bos "c" eos) |
| 85 | + "ababc") |
| 86 | + |
| 87 | +(test-re '("ababc" "abab") |
| 88 | + '(: bol ($ (* "ab")) "c") |
| 89 | + "ababc") |
| 90 | +(test-re '("ababc" "abab") |
| 91 | + '(: ($ (* "ab")) "c" eol) |
| 92 | + "ababc") |
| 93 | +(test-re '("ababc" "abab") |
| 94 | + '(: bol ($ (* "ab")) "c" eol) |
| 95 | + "ababc") |
| 96 | +(test-re #f |
| 97 | + '(: bol ($ (* "ab")) eol "c") |
| 98 | + "ababc") |
| 99 | +(test-re #f |
| 100 | + '(: ($ (* "ab")) bol "c" eol) |
| 101 | + "ababc") |
| 102 | +(test-re '("\nabc\n" "abc") |
| 103 | + '(: (* #\newline) bol ($ (* alpha)) eol (* #\newline)) |
| 104 | + "\nabc\n") |
| 105 | +(test-re #f |
| 106 | + '(: (* #\newline) bol ($ (* alpha)) eol (* #\newline)) |
| 107 | + "\n'abc\n") |
| 108 | +(test-re #f |
| 109 | + '(: (* #\newline) bol ($ (* alpha)) eol (* #\newline)) |
| 110 | + "\nabc.\n") |
| 111 | + |
| 112 | +(test-re '("ababc" "abab") |
| 113 | + '(: bow ($ (* "ab")) "c") |
| 114 | + "ababc") |
| 115 | +(test-re '("ababc" "abab") |
| 116 | + '(: ($ (* "ab")) "c" eow) |
| 117 | + "ababc") |
| 118 | +(test-re '("ababc" "abab") |
| 119 | + '(: bow ($ (* "ab")) "c" eow) |
| 120 | + "ababc") |
| 121 | +(test-re #f |
| 122 | + '(: bow ($ (* "ab")) eow "c") |
| 123 | + "ababc") |
| 124 | +(test-re #f |
| 125 | + '(: ($ (* "ab")) bow "c" eow) |
| 126 | + "ababc") |
| 127 | +(test-re '(" abc " "abc") |
| 128 | + '(: (* space) bow ($ (* alpha)) eow (* space)) |
| 129 | + " abc ") |
| 130 | +(test-re #f |
| 131 | + '(: (* space) bow ($ (* alpha)) eow (* space)) |
| 132 | + " 'abc ") |
| 133 | +(test-re #f |
| 134 | + '(: (* space) bow ($ (* alpha)) eow (* space)) |
| 135 | + " abc. ") |
| 136 | +(test-re '("abc " "abc") |
| 137 | + '(: ($ (* alpha)) (* any)) |
| 138 | + "abc ") |
| 139 | +(test-re '("abc " "") |
| 140 | + '(: ($ (*? alpha)) (* any)) |
| 141 | + "abc ") |
| 142 | +(test-re '("<em>Hello World</em>" "em>Hello World</em") |
| 143 | + '(: "<" ($ (* any)) ">" (* any)) |
| 144 | + "<em>Hello World</em>") |
| 145 | +(test-re '("<em>Hello World</em>" "em") |
| 146 | + '(: "<" ($ (*? any)) ">" (* any)) |
| 147 | + "<em>Hello World</em>") |
| 148 | +(test-re-search '("foo") '(: "foo") " foo ") |
| 149 | +(test-re-search #f '(: nwb "foo" nwb) " foo ") |
| 150 | +(test-re-search '("foo") '(: nwb "foo" nwb) "xfoox") |
| 151 | + |
| 152 | +(test-re '("beef") |
| 153 | + '(* (/"af")) |
| 154 | + "beef") |
| 155 | + |
| 156 | +(test-re '("12345beef" "beef") |
| 157 | + '(: (* numeric) ($ (* (/"af")))) |
| 158 | + "12345beef") |
| 159 | + |
| 160 | +(let ((number '($ (+ numeric)))) |
| 161 | + (test-equal '("555" "867" "5309") |
| 162 | + (cdr |
| 163 | + (regexp-match->list |
| 164 | + (regexp-search `(: ,number "-" ,number "-" ,number) |
| 165 | + "555-867-5309")))) |
| 166 | + (test-equal '("555" "5309") |
| 167 | + (cdr |
| 168 | + (regexp-match->list |
| 169 | + (regexp-search `(: ,number "-" (w/nocapture ,number) "-" ,number) |
| 170 | + "555-867-5309"))))) |
| 171 | + |
| 172 | +(test-re '("12345BeeF" "BeeF") |
| 173 | + '(: (* numeric) (w/nocase ($ (* (/"af"))))) |
| 174 | + "12345BeeF") |
| 175 | + |
| 176 | +(test-re #f '(* lower) "abcD") |
| 177 | +(test-re '("abcD") '(w/nocase (* lower)) "abcD") |
| 178 | +(cond-expand |
| 179 | + (full-unicode |
| 180 | + (test-re '("σζ") '(* lower) "σζ") |
| 181 | + (test-re '("Σ") '(* upper) "Σ") |
| 182 | + (test-re '("\x01C5;") '(* title) "\x01C5;") |
| 183 | + (test-re '("σζ\x01C5;") '(w/nocase (* lower)) "σζ\x01C5;") |
| 184 | + |
| 185 | + (test-re '("кириллица") '(* alpha) "кириллица") |
| 186 | + (test-re #f '(w/ascii (* alpha)) "кириллица") |
| 187 | + (test-re '("кириллица") '(w/nocase "КИРИЛЛИЦА") "кириллица") |
| 188 | + |
| 189 | + (test-re '("12345") '(* numeric) "12345") |
| 190 | + (test-re #f '(w/ascii (* numeric)) "12345") |
| 191 | + |
| 192 | + (test-re '("한") 'grapheme "한") |
| 193 | + (test-re '("글") 'grapheme "글") |
| 194 | + |
| 195 | + (test-re '("한") '(: bog grapheme eog) "한") |
| 196 | + (test-re #f '(: "ᄒ" bog grapheme eog "ᆫ") "한") |
| 197 | + |
| 198 | + (test-equal '("a" "b" "c") (regexp-extract 'grapheme "abc")) |
| 199 | + (test-equal '("a" " " "b" " " "c") (regexp-extract 'grapheme "a b c")) |
| 200 | + (test-equal '("a" "\n" "b" "\r\n" "c") (regexp-extract 'grapheme "a\nb\r\nc")) |
| 201 | + (test-equal '("a\x0300;" "b\x0301;\x0302;" "c\x0303;\x0304;\x0305;") |
| 202 | + (regexp-extract 'grapheme "a\x0300;b\x0301;\x0302;c\x0303;\x0304;\x0305;")) |
| 203 | + (test-equal '("한" "글") (regexp-extract 'grapheme "한글"))) |
| 204 | + (else)) |
| 205 | + |
| 206 | +(test-equal '("123" "456" "789") (regexp-extract '(+ numeric) "abc123def456ghi789")) |
| 207 | +(test-equal '("123" "456" "789") (regexp-extract '(* numeric) "abc123def456ghi789")) |
| 208 | +(test-equal '("abc" "def" "ghi" "") (regexp-split '(+ numeric) "abc123def456ghi789")) |
| 209 | +(test-equal '("abc" "def" "ghi" "") |
| 210 | + (regexp-split '(* numeric) "abc123def456ghi789")) |
| 211 | +(test-equal '("a" "b") (regexp-split '(+ whitespace) "a b")) |
| 212 | +(test-equal '("a" "" "b") |
| 213 | + (regexp-split '(",;") "a,,b")) |
| 214 | +(test-equal '("a" "" "b" "") |
| 215 | + (regexp-split '(",;") "a,,b,")) |
| 216 | +(test-equal '("") |
| 217 | + (regexp-partition '(* numeric) "")) |
| 218 | +(test-equal '("abc" "123" "def" "456" "ghi") |
| 219 | + (regexp-partition '(* numeric) "abc123def456ghi")) |
| 220 | +(test-equal '("abc" "123" "def" "456" "ghi" "789") |
| 221 | + (regexp-partition '(* numeric) "abc123def456ghi789")) |
| 222 | + |
| 223 | +(cond-expand |
| 224 | + (full-unicode |
| 225 | + (test-equal '("한" "글") |
| 226 | + (regexp-extract |
| 227 | + 'grapheme |
| 228 | + (utf8->string '#u8(#xe1 #x84 #x92 #xe1 #x85 #xa1 #xe1 #x86 #xab |
| 229 | + #xe1 #x84 #x80 #xe1 #x85 #xb3 #xe1 #x86 #xaf))))) |
| 230 | + (else)) |
| 231 | + |
| 232 | +(test-equal "abc def" (regexp-replace '(+ space) "abc \t\n def" " ")) |
| 233 | +(test-equal " abc-abc" |
| 234 | + (regexp-replace '(: ($ (+ alpha)) ":" (* space)) " abc: " '(1 "-" 1))) |
| 235 | +(test-equal " abc- abc" |
| 236 | + (regexp-replace '(: ($ (+ alpha)) ":" (* space)) " abc: " '(1 "-" pre 1))) |
| 237 | + |
| 238 | +(test-equal "-abc \t\n d ef " |
| 239 | + (regexp-replace '(+ space) " abc \t\n d ef " "-" 0)) |
| 240 | +(test-equal "-abc \t\n d ef " |
| 241 | + (regexp-replace '(+ space) " abc \t\n d ef " "-" 0 #f 0)) |
| 242 | +(test-equal " abc-d ef " |
| 243 | + (regexp-replace '(+ space) " abc \t\n d ef " "-" 0 #f 1)) |
| 244 | +(test-equal " abc \t\n d-ef " |
| 245 | + (regexp-replace '(+ space) " abc \t\n d ef " "-" 0 #f 2)) |
| 246 | +(test-equal " abc \t\n d ef-" |
| 247 | + (regexp-replace '(+ space) " abc \t\n d ef " "-" 0 #f 3)) |
| 248 | +(test-equal " abc \t\n d ef " |
| 249 | + (regexp-replace '(+ space) " abc \t\n d ef " "-" 0 #f 4)) |
| 250 | +(test-equal " abc d ef " (regexp-replace-all '(+ space) " abc \t\n d ef " " ")) |
| 251 | + |
| 252 | +(test-equal "bc pre: <<<bc >>> match1: <<<def>>> post: <<<gh>>>gh" |
| 253 | + (regexp-replace |
| 254 | + '(: ($ (+ alpha)) ":" (* space)) |
| 255 | + "abc def: ghi" |
| 256 | + '("pre: <<<" pre ">>> match1: <<<" 1 ">>> post: <<<" post ">>>") |
| 257 | + 1 11)) |
| 258 | +(test-end) |
0 commit comments