32
32
(modify-category-entry key ?P ))))
33
33
unicode-category-table)
34
34
35
- (dolist (key (string-to-list " !#$%&*+./<=>?@^|~\\ -" ))
35
+ (dolist (key (string-to-list " !#$%&*+./<=>?@^|~\\ -: " ))
36
36
(modify-category-entry key ?P )))
37
37
38
38
(defconst haskell-lexeme-modid
@@ -43,28 +43,31 @@ Note that GHC accepts Unicode category UppercaseLetter as a first
43
43
character. Following letters are from Unicode categories
44
44
UppercaseLetter, LowercaseLetter, OtherLetter, TitlecaseLetter,
45
45
ModifierLetter, DecimalNumber, OtherNumber, backslash or
46
- underscore.
47
-
48
- Note that this differs from constructor identifier as the latter
49
- one can have any number of hash character at the end to
50
- accommodate MagicHash extension." )
46
+ underscore." )
51
47
52
48
(defconst haskell-lexeme-id
53
- " [[:alpha:]_][[:alnum:]'_]*#* "
49
+ " [[:alpha:]_][[:alnum:]'_]*"
54
50
" Regexp matching a valid Haskell identifier.
55
51
56
52
GHC accepts a string starting with any alphabetic character or
57
53
underscore followed by any alphanumeric character or underscore
58
54
or apostrophe." )
59
55
60
56
(defconst haskell-lexeme-sym
61
- " \\ (:? \\ cP \\ |: \\ ) +"
57
+ " \\ cP +"
62
58
" Regexp matching a valid Haskell variable or constructor symbol.
63
59
64
60
GHC accepts a string of chars from the set
65
61
[:!#$%&*+./<=>?@^|~\\ -] or Unicode category Symbol for chars with
66
62
codes larger than 128 only." )
67
63
64
+ (defconst haskell-lexeme-idsym-first-char
65
+ " \\ (?:[[:alpha:]_]\\ |\\ cP\\ )"
66
+ " Regexp matching first character of a qualified or unqualified
67
+ identifier or symbol.
68
+
69
+ Useful for `re-search-forward' ." )
70
+
68
71
(defconst haskell-lexeme-modid-opt-prefix
69
72
(concat " \\ (?:" haskell-lexeme-modid " \\ .\\ )*" )
70
73
" Regexp matching a valid Haskell module prefix, potentially empty.
@@ -80,6 +83,53 @@ dot. For path component syntax see `haskell-lexeme-modid'.")
80
83
81
84
Note that (match-string 1) returns the unqualified part." )
82
85
86
+ (defun haskell-lexeme-looking-at-qidsym ()
87
+ " Non-nil when point is just in front of an optionally qualified
88
+ identifier or symbol.
89
+
90
+ Using this function is more efficient than matching against the
91
+ regexp `haskell-lexeme-qid-or-qsym' .
92
+
93
+ Returns:
94
+ 'qid - if matched a qualified id: 'Data.Map' or 'Map'
95
+ 'qsym - if matched a qualified id: 'Monad.>>=' or '>>='
96
+ 'qprefix - if matched only modid prefix: 'Data.'
97
+
98
+ After successful 'qid or 'qsym match (match-string 1) will return
99
+ the unqualified part (if any)."
100
+ (let ((begin (point ))
101
+ (match-data-old (match-data )))
102
+ (save-excursion
103
+ (while (looking-at (concat haskell-lexeme-modid " \\ ." ))
104
+ (goto-char (match-end 0 )))
105
+ (cond
106
+ ((looking-at haskell-lexeme-id)
107
+ (let ((beg (match-beginning 0 ))
108
+ (end (match-end 0 )))
109
+
110
+ ; ; check is MagicHash is present at the end of the token
111
+ (goto-char end)
112
+ (when (looking-at " #+" )
113
+ (setq end (match-end 0 )))
114
+
115
+ (set-match-data
116
+ (list begin end
117
+ beg end)))
118
+ 'qid )
119
+ ((looking-at haskell-lexeme-sym)
120
+ (set-match-data
121
+ (list begin (match-end 0 )
122
+ (match-beginning 0 ) (match-end 0 )))
123
+ 'qsym )
124
+ ((equal begin (point ))
125
+ (set-match-data match-data-old)
126
+ nil )
127
+ (t
128
+ (set-match-data
129
+ (list begin (point )
130
+ nil nil ))
131
+ 'qprefix )))))
132
+
83
133
(defconst haskell-lexeme-qid
84
134
(rx-to-string `(: (regexp " '*" )
85
135
(regexp , haskell-lexeme-modid-opt-prefix )
@@ -148,7 +198,7 @@ strictly only escape sequences defined in Haskell Report.")
148
198
(group (* (| (regexp " \\\\ [ \t\n\r\v\f ]*\\\\ " )
149
199
(regexp " \\\\ [ \t\n\r\v\f ]+" )
150
200
(regexp " \\\\ [^ \t\n\r\v\f ]" )
151
- (regexp " [^\"\n \\ ]" ))))
201
+ (* ( regexp " [^\"\n \\ ]" ) ))))
152
202
(group (| " \" " (regexp " $" ) (regexp " \\\\ ?\\ '" )
153
203
))))
154
204
" Regexp matching a string literal lookalike.
@@ -166,30 +216,111 @@ Regexp has subgroup expressions:
166
216
(match-text 3) matches the closing quote, or a closing
167
217
newline or empty string at the end of the buffer." )
168
218
169
- (defconst haskell-lexeme-quasi-quote-literal
170
- (rx-to-string `(: " [" (optional " $" )
171
- (group (regexp , haskell-lexeme-id ))
172
- (group " |" )
173
- (group (* (| (not (any " |" ))
174
- (: " |" (not (any " ]" ))))
175
- ))
176
- (group (| " |" eos))
177
- (| " ]" eos)))
178
- " Regexp matching a quasi quote literal.
219
+ (defun haskell-lexeme-looking-at-string-literal ()
220
+ " Non-nil when point is at a string literal lookalike.
221
+
222
+ Note that this function matches more than Haskell Report
223
+ specifies because we want to support also code under edit.
224
+
225
+ String literals end with double quote or unescaped newline or end
226
+ of buffer.
227
+
228
+ After successful match:
229
+ (match-text 1) matches the opening doublequote.
230
+ (match-text 2) matches the inside of the string.
231
+ (match-text 3) matches the closing quote, or a closing
232
+ newline or is nil when at the end of the buffer."
233
+ (when (looking-at " \" " )
234
+ (save-excursion
235
+ (let ((begin (point )))
236
+ (goto-char (match-end 0 ))
237
+ (let (finish)
238
+ (while (and (not finish)
239
+ (re-search-forward " [\"\n \\ ]" nil 'goto-eob ))
240
+ (cond
241
+ ((equal (match-string 0 ) " \\ " )
242
+ (if (looking-at " [ \t\n\r\v\f ]+\\\\ ?" )
243
+ (goto-char (match-end 0 ))
244
+ (goto-char (1+ (point )))))
245
+
246
+ ((equal (match-string 0 ) " \" " )
247
+ (set-match-data
248
+ (list begin (match-end 0 )
249
+ begin (1+ begin)
250
+ (1+ begin) (match-beginning 0 )
251
+ (match-beginning 0 ) (match-end 0 )))
252
+ (setq finish t ))
253
+
254
+ ((equal (match-string 0 ) " \n " )
255
+ (set-match-data
256
+ (list begin (match-beginning 0 )
257
+ begin (1+ begin)
258
+ (1+ begin) (match-beginning 0 )
259
+ nil nil ))
260
+ (setq finish t ))))
261
+ (unless finish
262
+ ; ; string closed by end of buffer
263
+ (set-match-data
264
+ (list begin (point )
265
+ begin (1+ begin)
266
+ (1+ begin) (point )
267
+ nil nil ))))))
268
+ ; ; there was a match
269
+ t ))
270
+
271
+ (defun haskell-lexeme-looking-at-quasi-quote-literal ()
272
+ " Non-nil when point is just in front of Template Haskell
273
+ quaisquote literal.
179
274
180
275
Quasi quotes start with '[xxx|' or '[$xxx|' sequence and end with
181
- '|]'. The 'xxx' is a quoter name. There is no escaping mechanism
276
+ '|]'. The 'xxx' is a quoter name. There is no escaping mechanism
182
277
provided for the ending sequence.
183
278
184
279
Regexp has subgroup expressions:
185
280
(match-text 1) matches the quoter name (without $ sign if present).
186
281
(match-text 2) matches the opening vertical bar.
187
282
(match-text 3) matches the inside of the quoted string.
188
283
(match-text 4) matches the closing vertical bar
189
- or empty string if at the end of the buffer.
190
-
191
- Note that this regexp admits 'e', 't', 'd', 'p' as quoter names
192
- although template Haskell explicitly rejects those." )
284
+ or nil if at the end of the buffer.
285
+
286
+ Note that this function excludes 'e', 't', 'd', 'p' as quoter
287
+ names according to Template Haskell specification."
288
+ (let ((match-data-old (match-data )))
289
+ (if (and
290
+ (looking-at (rx-to-string `(: " [" (optional " $" )
291
+ (group (regexp , haskell-lexeme-id ))
292
+ (group " |" ))))
293
+ (equal (haskell-lexeme-classify-by-first-char (char-after (match-beginning 1 )))
294
+ 'varid )
295
+ (not (member (match-string 1 ) '(" e" " t" " d" " p" ))))
296
+ (save-excursion
297
+ ; ; note that quasi quote syntax does not have any escaping
298
+ ; ; mechanism and if not closed it will span til lthe end of buffer
299
+ (goto-char (match-end 0 ))
300
+ (let ((match-data (match-data ))
301
+ (match-data-2 (and (re-search-forward " |]" nil t )
302
+ (match-data ))))
303
+ (if match-data-2
304
+ (set-match-data
305
+ (list
306
+ (nth 0 match-data) (nth 1 match-data-2) ; ; whole match
307
+ (nth 2 match-data) (nth 3 match-data) ; ; quoter name
308
+ (nth 4 match-data) (nth 5 match-data) ; ; opening bar
309
+ (nth 5 match-data) (nth 0 match-data-2) ; ; inner string
310
+ (nth 0 match-data-2) (1+ (nth 0 match-data-2)))) ; ; closing bar
311
+
312
+ (set-match-data
313
+ (list
314
+ (nth 0 match-data) (point-max ) ; ; whole match
315
+ (nth 2 match-data) (nth 3 match-data) ; ; quoter name
316
+ (nth 4 match-data) (nth 5 match-data) ; ; opening bar
317
+ (nth 5 match-data) (point-max ) ; ; inner string
318
+ nil nil )) ; ; closing bar
319
+ ))
320
+ t )
321
+ ; ; restore old match data if not matched
322
+ (set-match-data match-data-old)
323
+ nil )))
193
324
194
325
(defun haskell-lexeme-classify-by-first-char (char )
195
326
" Classify token by CHAR.
@@ -258,7 +389,8 @@ See `haskell-lexeme-classify-by-first-char' for details."
258
389
(point (point-marker )))
259
390
(or
260
391
(and
261
- (equal (string-to-syntax " <" ) (syntax-after (point )))
392
+ (equal (string-to-syntax " <" )
393
+ (get-char-property (point ) 'syntax-table ))
262
394
(progn
263
395
(set-match-data (list point (set-marker (make-marker ) (line-end-position ))))
264
396
'literate-comment ))
@@ -271,21 +403,13 @@ See `haskell-lexeme-classify-by-first-char' for details."
271
403
'nested-comment ))
272
404
(and (looking-at haskell-lexeme-char-literal)
273
405
'char )
274
- (and (looking-at haskell-lexeme-string-literal)
406
+ (and (haskell-lexeme-looking-at -string-literal)
275
407
'string )
276
408
(and (looking-at " [][(){}`,;]" )
277
- (let ((match-data (match-data )))
278
- (if (and (equal " [" (match-string-no-properties 0 ))
279
- (looking-at haskell-lexeme-quasi-quote-literal))
280
- (if (or (member (match-string-no-properties 1 ) '(" e" " d" " p" " t" ))
281
- (not (equal (haskell-lexeme-classify-by-first-char (char-after (match-beginning 1 )))
282
- 'varid )))
283
- (progn
284
- (set-match-data match-data)
285
- 'special )
286
- 'template-haskell-quasi-quote )
287
- 'special )))
288
- (and (looking-at haskell-lexeme-qid-or-qsym)
409
+ (if (haskell-lexeme-looking-at-quasi-quote-literal)
410
+ 'template-haskell-quasi-quote
411
+ 'special ))
412
+ (and (haskell-lexeme-looking-at-qidsym)
289
413
(if (save-match-data
290
414
(string-match " \\ `---*\\' " (match-string-no-properties 0 )))
291
415
(progn
0 commit comments