Skip to content

Commit 748d8bb

Browse files
committed
[mlir] Make parser not rely on terminating null.
Used in follow up to parse slices of buffer.
1 parent 35693da commit 748d8bb

File tree

3 files changed

+34
-3
lines changed

3 files changed

+34
-3
lines changed

mlir/lib/AsmParser/DialectSymbolParser.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
8989
nestedPunctuation.pop_back();
9090
return success();
9191
};
92+
const char* curBufferEnd = state.lex.getBufferEnd();
9293
do {
9394
// Handle code completions, which may appear in the middle of the symbol
9495
// body.
@@ -98,6 +99,12 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
9899
break;
99100
}
100101

102+
if (curBufferEnd == curPtr) {
103+
if (!nestedPunctuation.empty())
104+
return emitPunctError();
105+
return emitError("unexpected nul or EOF in pretty dialect name");
106+
}
107+
101108
char c = *curPtr++;
102109
switch (c) {
103110
case '\0':

mlir/lib/AsmParser/Lexer.cpp

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,18 @@ Lexer::Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context,
3737
AsmParserCodeCompleteContext *codeCompleteContext)
3838
: sourceMgr(sourceMgr), context(context), codeCompleteLoc(nullptr) {
3939
auto bufferID = sourceMgr.getMainFileID();
40+
41+
// Check to see if the main buffer contains the last buffer, and if so the
42+
// last buffer should be used as main file for parsing.
43+
if (sourceMgr.getNumBuffers() > 1) {
44+
unsigned lastFileID = sourceMgr.getNumBuffers();
45+
const llvm::MemoryBuffer *main = sourceMgr.getMemoryBuffer(bufferID);
46+
const llvm::MemoryBuffer *last = sourceMgr.getMemoryBuffer(lastFileID);
47+
if (main->getBufferStart() <= last->getBufferStart() &&
48+
main->getBufferEnd() >= last->getBufferEnd()) {
49+
bufferID = lastFileID;
50+
}
51+
}
4052
curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
4153
curPtr = curBuffer.begin();
4254

@@ -71,13 +83,17 @@ Token Lexer::emitError(const char *loc, const Twine &message) {
7183
}
7284

7385
Token Lexer::lexToken() {
86+
const char *curBufferEnd = curBuffer.end();
7487
while (true) {
7588
const char *tokStart = curPtr;
7689

7790
// Check to see if the current token is at the code completion location.
7891
if (tokStart == codeCompleteLoc)
7992
return formToken(Token::code_complete, tokStart);
8093

94+
if (tokStart == curBufferEnd)
95+
return formToken(Token::eof, tokStart);
96+
8197
// Lex the next token.
8298
switch (*curPtr++) {
8399
default:
@@ -102,7 +118,7 @@ Token Lexer::lexToken() {
102118
case 0:
103119
// This may either be a nul character in the source file or may be the EOF
104120
// marker that llvm::MemoryBuffer guarantees will be there.
105-
if (curPtr - 1 == curBuffer.end())
121+
if (curPtr - 1 == curBufferEnd)
106122
return formToken(Token::eof, tokStart);
107123
continue;
108124

@@ -259,15 +275,19 @@ void Lexer::skipComment() {
259275
assert(*curPtr == '/');
260276
++curPtr;
261277

278+
const char *curBufferEnd = curBuffer.end();
262279
while (true) {
280+
if (curPtr == curBufferEnd)
281+
return;
282+
263283
switch (*curPtr++) {
264284
case '\n':
265285
case '\r':
266286
// Newline is end of comment.
267287
return;
268288
case 0:
269289
// If this is the end of the buffer, end the comment.
270-
if (curPtr - 1 == curBuffer.end()) {
290+
if (curPtr - 1 == curBufferEnd) {
271291
--curPtr;
272292
return;
273293
}
@@ -405,6 +425,7 @@ Token Lexer::lexPrefixedIdentifier(const char *tokStart) {
405425
Token Lexer::lexString(const char *tokStart) {
406426
assert(curPtr[-1] == '"');
407427

428+
const char *curBufferEnd = curBuffer.end();
408429
while (true) {
409430
// Check to see if there is a code completion location within the string. In
410431
// these cases we generate a completion location and place the currently
@@ -419,7 +440,7 @@ Token Lexer::lexString(const char *tokStart) {
419440
case 0:
420441
// If this is a random nul character in the middle of a string, just
421442
// include it. If it is the end of file, then it is an error.
422-
if (curPtr - 1 != curBuffer.end())
443+
if (curPtr - 1 != curBufferEnd)
423444
continue;
424445
[[fallthrough]];
425446
case '\n':

mlir/lib/AsmParser/Lexer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ class Lexer {
4040
/// Returns the start of the buffer.
4141
const char *getBufferBegin() { return curBuffer.data(); }
4242

43+
/// Returns the end of the buffer.
44+
const char *getBufferEnd() { return curBuffer.end(); }
45+
4346
/// Return the code completion location of the lexer, or nullptr if there is
4447
/// none.
4548
const char *getCodeCompleteLoc() const { return codeCompleteLoc; }

0 commit comments

Comments
 (0)