@@ -25,8 +25,6 @@ module Kore.Parser.Lexer
25
25
, parseId
26
26
, parseAnyId , parseSetId , isSymbolId
27
27
, isElementVariableId , isSetVariableId
28
- , elementVariableIdParser
29
- , setVariableIdParser
30
28
, parseSortId
31
29
, parseSymbolId
32
30
, parseModuleName
@@ -48,6 +46,9 @@ import Data.Map.Strict
48
46
( Map
49
47
)
50
48
import qualified Data.Map.Strict as Map
49
+ import Data.Text
50
+ ( Text
51
+ )
51
52
import qualified Data.Text as Text
52
53
import Text.Megaparsec
53
54
( SourcePos (.. )
@@ -84,6 +85,7 @@ space = L.space Parser.space1 lineComment blockComment
84
85
where
85
86
lineComment = L. skipLineComment " //"
86
87
blockComment = L. skipBlockComment " /*" " */"
88
+ {-# INLINE space #-}
87
89
88
90
{- | Parse the character, but skip its result.
89
91
-}
@@ -97,7 +99,7 @@ skipChar = Monad.void . Parser.char
97
99
See also: 'L.symbol', 'space'
98
100
99
101
-}
100
- symbol :: String -> Parser ()
102
+ symbol :: Text -> Parser ()
101
103
symbol = Monad. void . L. symbol space
102
104
103
105
colon :: Parser ()
@@ -163,7 +165,7 @@ consumes any trailing whitespace.
163
165
See also: 'space'
164
166
165
167
-}
166
- keyword :: String -> Parser ()
168
+ keyword :: Text -> Parser ()
167
169
keyword s = lexeme $ do
168
170
_ <- Parser. chunk s
169
171
-- Check that the next character cannot be part of an @id@, i.e. check that
@@ -183,19 +185,16 @@ sourcePosToFileLocation
183
185
, column = unPos column'
184
186
}
185
187
186
- {- Takes a parser for the string of the identifier
187
- and returns an 'Id' annotated with position.
188
- -}
189
- stringParserToIdParser :: Parser String -> Parser Id
190
- stringParserToIdParser stringRawParser = do
188
+ {- | Annotate a 'Text' parser with an 'AstLocation'.
189
+ -}
190
+ parseIntoId :: Parser Text -> Parser Id
191
+ parseIntoId stringRawParser = do
191
192
! pos <- sourcePosToFileLocation <$> getSourcePos
192
- name <- lexeme stringRawParser
193
- return Id
194
- { getId = Text. pack name
195
- , idLocation = AstLocationFile pos
196
- }
193
+ getId <- lexeme stringRawParser
194
+ return Id { getId, idLocation = AstLocationFile pos }
195
+ {-# INLINE parseIntoId #-}
197
196
198
- koreKeywordsSet :: HashSet String
197
+ koreKeywordsSet :: HashSet Text
199
198
koreKeywordsSet = HashSet. fromList keywords
200
199
where
201
200
keywords =
@@ -224,17 +223,18 @@ genericIdRawParser
224
223
:: (Char -> Bool ) -- ^ contains the characters allowed for @⟨prefix-char⟩@.
225
224
-> (Char -> Bool ) -- ^ contains the characters allowed for @⟨body-char⟩@.
226
225
-> IdKeywordParsing
227
- -> Parser String
226
+ -> Parser Text
228
227
genericIdRawParser isFirstChar isBodyChar idKeywordParsing = do
229
- c <- Parser. satisfy isFirstChar <?> " first identifier character"
230
- cs <- Parser. takeWhileP (Just " identifier character" ) isBodyChar
231
- let genericId = c : cs
232
- keywordsForbidden = idKeywordParsing == KeywordsForbidden
228
+ (genericId, _) <- Parser. match $ do
229
+ _ <- Parser. satisfy isFirstChar <?> " first identifier character"
230
+ _ <- Parser. takeWhileP (Just " identifier character" ) isBodyChar
231
+ pure ()
232
+ let keywordsForbidden = idKeywordParsing == KeywordsForbidden
233
233
isKeyword = HashSet. member genericId koreKeywordsSet
234
234
when (keywordsForbidden && isKeyword)
235
235
$ fail
236
236
( " Identifiers should not be keywords: '"
237
- ++ genericId
237
+ ++ Text. unpack genericId
238
238
++ " '."
239
239
)
240
240
return genericId
@@ -293,11 +293,14 @@ isIdChar c = isIdFirstChar c || isIdOtherChar c
293
293
An identifier cannot be a keyword.
294
294
-}
295
295
parseId :: Parser Id
296
- parseId = stringParserToIdParser (parseIdRaw KeywordsForbidden )
296
+ parseId = parseIntoId parseIdText
297
297
298
- parseIdRaw :: IdKeywordParsing -> Parser String
298
+ parseIdRaw :: IdKeywordParsing -> Parser Text
299
299
parseIdRaw = genericIdRawParser isIdFirstChar isIdChar
300
300
301
+ parseIdText :: Parser Text
302
+ parseIdText = parseIdRaw KeywordsForbidden
303
+
301
304
{- | Parse a module name.
302
305
303
306
@
@@ -309,7 +312,7 @@ parseModuleName = lexeme moduleNameRawParser
309
312
310
313
moduleNameRawParser :: Parser ModuleName
311
314
moduleNameRawParser =
312
- ModuleName . Text. pack <$> parseIdRaw KeywordsForbidden
315
+ ModuleName <$> parseIdRaw KeywordsForbidden
313
316
314
317
{- | Parses a 'Sort' 'Id'
315
318
@@ -321,7 +324,9 @@ parseSortId :: Parser Id
321
324
parseSortId = parseId <?> " sort identifier"
322
325
323
326
parseAnyId :: Parser Id
324
- parseAnyId = (parseSpecialId <|> parseSetId <|> parseId) <?> " identifier"
327
+ parseAnyId = parseIntoId
328
+ (parseSpecialIdText <|> parseSetIdText <|> parseIdText)
329
+ <?> " identifier"
325
330
326
331
isSymbolId :: Id -> Bool
327
332
isSymbolId Id { getId } =
@@ -336,19 +341,16 @@ isElementVariableId Id { getId } =
336
341
isSetVariableId :: Id -> Bool
337
342
isSetVariableId Id { getId } = Text. head getId == ' @'
338
343
339
- parseSpecialId :: Parser Id
340
- parseSpecialId =
341
- stringParserToIdParser parseSpecialIdString
342
- where
343
- parseSpecialIdString =
344
- (:) <$> Parser. char ' \\ ' <*> parseIdRaw KeywordsPermitted
344
+ parseSpecialIdText :: Parser Text
345
+ parseSpecialIdText = fst <$> Parser. match
346
+ (Parser. char ' \\ ' >> parseIdRaw KeywordsPermitted )
347
+
348
+ parseSetIdText :: Parser Text
349
+ parseSetIdText = fst <$> Parser. match
350
+ (Parser. char ' @' >> parseIdRaw KeywordsPermitted )
345
351
346
352
parseSetId :: Parser Id
347
- parseSetId =
348
- stringParserToIdParser parseSetIdString
349
- where
350
- parseSetIdString =
351
- (:) <$> Parser. char ' @' <*> parseIdRaw KeywordsPermitted
353
+ parseSetId = parseIntoId parseSetIdText
352
354
353
355
{- | Parses a 'Symbol' 'Id'
354
356
@@ -357,41 +359,12 @@ parseSetId =
357
359
@
358
360
-}
359
361
parseSymbolId :: Parser Id
360
- parseSymbolId =
361
- stringParserToIdParser symbolIdRawParser <?> " symbol or alias identifier"
362
-
363
- symbolIdRawParser :: Parser String
364
- symbolIdRawParser = do
365
- c <- peekChar'
366
- if c == ' \\ '
367
- then do
368
- skipChar ' \\ '
369
- (c : ) <$> parseIdRaw KeywordsPermitted
370
- else parseIdRaw KeywordsForbidden
371
-
372
- {-| Parses a @set-variable-id@, which always starts with @\@@.
373
-
374
- @
375
- <set-variable-id> ::= ['@'] <id>
376
- @
377
- -}
378
- setVariableIdParser :: Parser Id
379
- setVariableIdParser = stringParserToIdParser setVariableIdRawParser
362
+ parseSymbolId = parseIntoId symbolIdRawParser <?> " symbol or alias identifier"
380
363
381
- setVariableIdRawParser :: Parser String
382
- setVariableIdRawParser = do
383
- start <- Parser. char ' @'
384
- end <- parseIdRaw KeywordsPermitted
385
- return (start: end)
386
-
387
- {-| Parses an @element-variable-id@
388
-
389
- @
390
- <element-variable-id> ::= <id>
391
- @
392
- -}
393
- elementVariableIdParser :: Parser Id
394
- elementVariableIdParser = parseId
364
+ symbolIdRawParser :: Parser Text
365
+ symbolIdRawParser = fmap fst $ Parser. match $
366
+ (Parser. char ' \\ ' >> parseIdRaw KeywordsPermitted )
367
+ <|> parseIdRaw KeywordsForbidden
395
368
396
369
{- | Parses a C-style string literal, unescaping it.
397
370
0 commit comments