-
Notifications
You must be signed in to change notification settings - Fork 14.6k
[mlir] Make parser not rely on terminating null. #151007
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Used in follow up to parse slices of buffer.
@llvm/pr-subscribers-mlir-core Author: Jacques Pienaar (jpienaar) ChangesUsed in follow up to parse slices of buffer. Full diff: https://github.com/llvm/llvm-project/pull/151007.diff 3 Files Affected:
diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp b/mlir/lib/AsmParser/DialectSymbolParser.cpp
index 9f4a87a6a02de..8b14e71118c3a 100644
--- a/mlir/lib/AsmParser/DialectSymbolParser.cpp
+++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp
@@ -89,6 +89,7 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
nestedPunctuation.pop_back();
return success();
};
+ const char *curBufferEnd = state.lex.getBufferEnd();
do {
// Handle code completions, which may appear in the middle of the symbol
// body.
@@ -98,6 +99,12 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
break;
}
+ if (curBufferEnd == curPtr) {
+ if (!nestedPunctuation.empty())
+ return emitPunctError();
+ return emitError("unexpected nul or EOF in pretty dialect name");
+ }
+
char c = *curPtr++;
switch (c) {
case '\0':
diff --git a/mlir/lib/AsmParser/Lexer.cpp b/mlir/lib/AsmParser/Lexer.cpp
index 751bd63e537f8..8f53529823e23 100644
--- a/mlir/lib/AsmParser/Lexer.cpp
+++ b/mlir/lib/AsmParser/Lexer.cpp
@@ -37,6 +37,18 @@ Lexer::Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context,
AsmParserCodeCompleteContext *codeCompleteContext)
: sourceMgr(sourceMgr), context(context), codeCompleteLoc(nullptr) {
auto bufferID = sourceMgr.getMainFileID();
+
+ // Check to see if the main buffer contains the last buffer, and if so the
+ // last buffer should be used as main file for parsing.
+ if (sourceMgr.getNumBuffers() > 1) {
+ unsigned lastFileID = sourceMgr.getNumBuffers();
+ const llvm::MemoryBuffer *main = sourceMgr.getMemoryBuffer(bufferID);
+ const llvm::MemoryBuffer *last = sourceMgr.getMemoryBuffer(lastFileID);
+ if (main->getBufferStart() <= last->getBufferStart() &&
+ main->getBufferEnd() >= last->getBufferEnd()) {
+ bufferID = lastFileID;
+ }
+ }
curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
curPtr = curBuffer.begin();
@@ -71,6 +83,7 @@ Token Lexer::emitError(const char *loc, const Twine &message) {
}
Token Lexer::lexToken() {
+ const char *curBufferEnd = curBuffer.end();
while (true) {
const char *tokStart = curPtr;
@@ -78,6 +91,9 @@ Token Lexer::lexToken() {
if (tokStart == codeCompleteLoc)
return formToken(Token::code_complete, tokStart);
+ if (tokStart == curBufferEnd)
+ return formToken(Token::eof, tokStart);
+
// Lex the next token.
switch (*curPtr++) {
default:
@@ -102,7 +118,7 @@ Token Lexer::lexToken() {
case 0:
// This may either be a nul character in the source file or may be the EOF
// marker that llvm::MemoryBuffer guarantees will be there.
- if (curPtr - 1 == curBuffer.end())
+ if (curPtr - 1 == curBufferEnd)
return formToken(Token::eof, tokStart);
continue;
@@ -259,7 +275,11 @@ void Lexer::skipComment() {
assert(*curPtr == '/');
++curPtr;
+ const char *curBufferEnd = curBuffer.end();
while (true) {
+ if (curPtr == curBufferEnd)
+ return;
+
switch (*curPtr++) {
case '\n':
case '\r':
@@ -267,7 +287,7 @@ void Lexer::skipComment() {
return;
case 0:
// If this is the end of the buffer, end the comment.
- if (curPtr - 1 == curBuffer.end()) {
+ if (curPtr - 1 == curBufferEnd) {
--curPtr;
return;
}
@@ -405,6 +425,7 @@ Token Lexer::lexPrefixedIdentifier(const char *tokStart) {
Token Lexer::lexString(const char *tokStart) {
assert(curPtr[-1] == '"');
+ const char *curBufferEnd = curBuffer.end();
while (true) {
// Check to see if there is a code completion location within the string. In
// these cases we generate a completion location and place the currently
@@ -419,7 +440,7 @@ Token Lexer::lexString(const char *tokStart) {
case 0:
// If this is a random nul character in the middle of a string, just
// include it. If it is the end of file, then it is an error.
- if (curPtr - 1 != curBuffer.end())
+ if (curPtr - 1 != curBufferEnd)
continue;
[[fallthrough]];
case '\n':
diff --git a/mlir/lib/AsmParser/Lexer.h b/mlir/lib/AsmParser/Lexer.h
index 4085a9b73854b..670444eb1f5b4 100644
--- a/mlir/lib/AsmParser/Lexer.h
+++ b/mlir/lib/AsmParser/Lexer.h
@@ -40,6 +40,9 @@ class Lexer {
/// Returns the start of the buffer.
const char *getBufferBegin() { return curBuffer.data(); }
+ /// Returns the end of the buffer.
+ const char *getBufferEnd() { return curBuffer.end(); }
+
/// Return the code completion location of the lexer, or nullptr if there is
/// none.
const char *getCodeCompleteLoc() const { return codeCompleteLoc; }
|
@llvm/pr-subscribers-mlir Author: Jacques Pienaar (jpienaar) ChangesUsed in follow up to parse slices of buffer. Full diff: https://github.com/llvm/llvm-project/pull/151007.diff 3 Files Affected:
diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp b/mlir/lib/AsmParser/DialectSymbolParser.cpp
index 9f4a87a6a02de..8b14e71118c3a 100644
--- a/mlir/lib/AsmParser/DialectSymbolParser.cpp
+++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp
@@ -89,6 +89,7 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
nestedPunctuation.pop_back();
return success();
};
+ const char *curBufferEnd = state.lex.getBufferEnd();
do {
// Handle code completions, which may appear in the middle of the symbol
// body.
@@ -98,6 +99,12 @@ ParseResult Parser::parseDialectSymbolBody(StringRef &body,
break;
}
+ if (curBufferEnd == curPtr) {
+ if (!nestedPunctuation.empty())
+ return emitPunctError();
+ return emitError("unexpected nul or EOF in pretty dialect name");
+ }
+
char c = *curPtr++;
switch (c) {
case '\0':
diff --git a/mlir/lib/AsmParser/Lexer.cpp b/mlir/lib/AsmParser/Lexer.cpp
index 751bd63e537f8..8f53529823e23 100644
--- a/mlir/lib/AsmParser/Lexer.cpp
+++ b/mlir/lib/AsmParser/Lexer.cpp
@@ -37,6 +37,18 @@ Lexer::Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context,
AsmParserCodeCompleteContext *codeCompleteContext)
: sourceMgr(sourceMgr), context(context), codeCompleteLoc(nullptr) {
auto bufferID = sourceMgr.getMainFileID();
+
+ // Check to see if the main buffer contains the last buffer, and if so the
+ // last buffer should be used as main file for parsing.
+ if (sourceMgr.getNumBuffers() > 1) {
+ unsigned lastFileID = sourceMgr.getNumBuffers();
+ const llvm::MemoryBuffer *main = sourceMgr.getMemoryBuffer(bufferID);
+ const llvm::MemoryBuffer *last = sourceMgr.getMemoryBuffer(lastFileID);
+ if (main->getBufferStart() <= last->getBufferStart() &&
+ main->getBufferEnd() >= last->getBufferEnd()) {
+ bufferID = lastFileID;
+ }
+ }
curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
curPtr = curBuffer.begin();
@@ -71,6 +83,7 @@ Token Lexer::emitError(const char *loc, const Twine &message) {
}
Token Lexer::lexToken() {
+ const char *curBufferEnd = curBuffer.end();
while (true) {
const char *tokStart = curPtr;
@@ -78,6 +91,9 @@ Token Lexer::lexToken() {
if (tokStart == codeCompleteLoc)
return formToken(Token::code_complete, tokStart);
+ if (tokStart == curBufferEnd)
+ return formToken(Token::eof, tokStart);
+
// Lex the next token.
switch (*curPtr++) {
default:
@@ -102,7 +118,7 @@ Token Lexer::lexToken() {
case 0:
// This may either be a nul character in the source file or may be the EOF
// marker that llvm::MemoryBuffer guarantees will be there.
- if (curPtr - 1 == curBuffer.end())
+ if (curPtr - 1 == curBufferEnd)
return formToken(Token::eof, tokStart);
continue;
@@ -259,7 +275,11 @@ void Lexer::skipComment() {
assert(*curPtr == '/');
++curPtr;
+ const char *curBufferEnd = curBuffer.end();
while (true) {
+ if (curPtr == curBufferEnd)
+ return;
+
switch (*curPtr++) {
case '\n':
case '\r':
@@ -267,7 +287,7 @@ void Lexer::skipComment() {
return;
case 0:
// If this is the end of the buffer, end the comment.
- if (curPtr - 1 == curBuffer.end()) {
+ if (curPtr - 1 == curBufferEnd) {
--curPtr;
return;
}
@@ -405,6 +425,7 @@ Token Lexer::lexPrefixedIdentifier(const char *tokStart) {
Token Lexer::lexString(const char *tokStart) {
assert(curPtr[-1] == '"');
+ const char *curBufferEnd = curBuffer.end();
while (true) {
// Check to see if there is a code completion location within the string. In
// these cases we generate a completion location and place the currently
@@ -419,7 +440,7 @@ Token Lexer::lexString(const char *tokStart) {
case 0:
// If this is a random nul character in the middle of a string, just
// include it. If it is the end of file, then it is an error.
- if (curPtr - 1 != curBuffer.end())
+ if (curPtr - 1 != curBufferEnd)
continue;
[[fallthrough]];
case '\n':
diff --git a/mlir/lib/AsmParser/Lexer.h b/mlir/lib/AsmParser/Lexer.h
index 4085a9b73854b..670444eb1f5b4 100644
--- a/mlir/lib/AsmParser/Lexer.h
+++ b/mlir/lib/AsmParser/Lexer.h
@@ -40,6 +40,9 @@ class Lexer {
/// Returns the start of the buffer.
const char *getBufferBegin() { return curBuffer.data(); }
+ /// Returns the end of the buffer.
+ const char *getBufferEnd() { return curBuffer.end(); }
+
/// Return the code completion location of the lexer, or nullptr if there is
/// none.
const char *getCodeCompleteLoc() const { return codeCompleteLoc; }
|
joker-eph
approved these changes
Jul 28, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Used in follow up to parse slices of buffer.