Skip to content

Commit d8ea250

Browse files
shoumikhinfacebook-github-bot
authored andcommitted
Fix message truncating logic to respect UTF8 encoding.
Summary: . Differential Revision: D82681904
1 parent cb42db2 commit d8ea250

File tree

2 files changed

+95
-9
lines changed

2 files changed

+95
-9
lines changed

runtime/platform/log.cpp

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,37 @@ static_assert(
5959
kLevelToPal[size_t(LogLevel::Fatal)] == et_pal_log_level_t::kFatal,
6060
"");
6161

62+
/**
63+
* Returns the length of the longest valid UTF-8 prefix in a byte buffer.
64+
*/
65+
static inline size_t get_valid_utf8_prefix_length(const char* bytes, size_t length) {
66+
if (!bytes || length == 0) {
67+
return 0;
68+
}
69+
const auto* data = reinterpret_cast<const unsigned char*>(bytes);
70+
auto index = size_t{0};
71+
auto last_valid_length = size_t{0};
72+
while (index < length) {
73+
const auto lead_byte = data[index];
74+
const size_t sequence_length =
75+
(lead_byte < 0x80) ? 1 :
76+
((lead_byte & 0xE0) == 0xC0) ? 2 :
77+
((lead_byte & 0xF0) == 0xE0) ? 3 :
78+
((lead_byte & 0xF8) == 0xF0) ? 4 : 0;
79+
if (!sequence_length || index + sequence_length > length) {
80+
return last_valid_length;
81+
}
82+
for (size_t continuation_index = 1; continuation_index < sequence_length; ++continuation_index) {
83+
if ((data[index + continuation_index] & 0xC0) != 0x80) {
84+
return last_valid_length;
85+
}
86+
}
87+
index += sequence_length;
88+
last_valid_length = index;
89+
}
90+
return last_valid_length;
91+
}
92+
6293
/**
6394
* Log a string message.
6495
*
@@ -84,20 +115,23 @@ void vlogf(
84115

85116
// Maximum length of a log message.
86117
static constexpr size_t kMaxLogMessageLength = 256;
87-
char buf[kMaxLogMessageLength];
88-
size_t len = vsnprintf(buf, kMaxLogMessageLength, format, args);
89-
if (len >= kMaxLogMessageLength - 1) {
90-
buf[kMaxLogMessageLength - 2] = '$';
91-
len = kMaxLogMessageLength - 1;
92-
}
93-
buf[kMaxLogMessageLength - 1] = 0;
118+
char buffer[kMaxLogMessageLength];
119+
120+
const auto write_count = vsnprintf(buffer, kMaxLogMessageLength, format, args);
121+
const size_t used_length = (write_count < 0)
122+
? 0
123+
: (write_count >= static_cast<int>(kMaxLogMessageLength)
124+
? kMaxLogMessageLength - 1
125+
: static_cast<size_t>(write_count));
126+
const auto valid_length = get_valid_utf8_prefix_length(buffer, used_length);
127+
buffer[valid_length] = '\0';
94128

95-
et_pal_log_level_t pal_level = (level < LogLevel::NumLevels)
129+
const auto pal_level = (level < LogLevel::NumLevels)
96130
? kLevelToPal[size_t(level)]
97131
: et_pal_log_level_t::kUnknown;
98132

99133
pal_emit_log_message(
100-
timestamp, pal_level, filename, function, line, buf, len);
134+
timestamp, pal_level, filename, function, line, buffer, valid_length);
101135

102136
#endif // ET_LOG_ENABLED
103137
}

runtime/platform/test/logging_test.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,44 @@
99
#include <gtest/gtest.h>
1010

1111
#include <executorch/runtime/platform/log.h>
12+
#include <executorch/runtime/platform/platform.h>
1213
#include <executorch/runtime/platform/runtime.h>
1314

1415
using namespace executorch::runtime;
1516

17+
// A global variable to capture the output from our mock logger.
18+
static std::string captured_log_message;
19+
20+
/**
21+
* @brief A mock implementation of the PAL logging function.
22+
*
23+
* This function will be linked into the test instead of the default platform
24+
* logger. It captures the formatted message into a global string so that
25+
* tests can assert its content.
26+
*/
27+
extern "C" void et_pal_emit_log_message(
28+
et_timestamp_t /*timestamp*/,
29+
et_pal_log_level_t /*level*/,
30+
const char* /*filename*/,
31+
const char* /*function*/,
32+
size_t /*line*/,
33+
const char* message,
34+
size_t /*length*/) {
35+
captured_log_message = message;
36+
}
37+
1638
class LoggingTest : public ::testing::Test {
1739
public:
1840
static void SetUpTestSuite() {
1941
// Initialize runtime.
2042
runtime_init();
2143
}
44+
45+
protected:
46+
// Clear the captured message before each test.
47+
void SetUp() override {
48+
captured_log_message.clear();
49+
}
2250
};
2351

2452
TEST_F(LoggingTest, LogLevels) {
@@ -31,3 +59,27 @@ TEST_F(LoggingTest, LogLevels) {
3159
TEST_F(LoggingTest, LogFormatting) {
3260
ET_LOG(Info, "Sample log with integer: %u", 100);
3361
}
62+
63+
TEST_F(LoggingTest, Utf8Truncation) {
64+
{
65+
// Create a prefix that ends right before the buffer limit.
66+
// 253 'A's + the first 2 bytes of "€" will fill the 255-byte content area.
67+
std::string prefix(253, 'A');
68+
std::string multi_byte_char = ""; // 3 bytes: 0xE2 0x82 0xAC
69+
std::string suffix = "_SHOULD_BE_CUT";
70+
71+
ET_LOG(Info, "%s%s%s", prefix.c_str(), multi_byte_char.c_str(), suffix.c_str());
72+
73+
EXPECT_EQ(captured_log_message, prefix);
74+
}
75+
{
76+
// 252 'B's + the first 3 bytes of "👍" will fill the 255-byte content area.
77+
std::string prefix(252, 'B');
78+
std::string multi_byte_char = "👍"; // 4 bytes: 0xF0 0x9F 0x91 0x8D
79+
std::string suffix = "_SHOULD_BE_CUT";
80+
81+
ET_LOG(Info, "%s%s%s", prefix.c_str(), multi_byte_char.c_str(), suffix.c_str());
82+
83+
EXPECT_EQ(captured_log_message, prefix);
84+
}
85+
}

0 commit comments

Comments
 (0)