Open
Description
While trying to modernize Arduino-avr core, especially the Arduino library, I wrote several tests for the String class, because its implementation looked suspicious in several places. The worst situation I found was in String::replace, which is causing UB by calling memcpy and strcpy with overlapping buffers and also completely broken in the implementation when the replacement string contains the search substring as a prefix.
Test case:
// from https://stackoverflow.com/questions/1494399/how-do-i-search-find-and-replace-in-a-standard-string
// for the following test...
void myReplace(std::string& str,
const std::string& oldStr,
const std::string& newStr)
{
std::string::size_type pos = 0u;
while((pos = str.find(oldStr, pos)) != std::string::npos){
str.replace(pos, oldStr.length(), newStr);
pos += newStr.length();
}
}
void replacementsWithFoundAsSubstringInReplacement(){
String s{"AAAAAAH!"};
s.reserve(1024); // to avoid UB by memory overwriting...
std::string ss{s.c_str()};
ASSERT_EQUAL(ss,s.c_str());
myReplace(ss,"AA","AAARGH!");
ASSERT_EQUAL("AAARGH!AAARGH!AAARGH!H!",ss);
s.replace("AA","AAARGH!");
// ASSERT_EQUAL(ss,s.c_str()); // fails...
ASSERT_EQUAL("AAARGH!AARGH!AARGH!AARGH!AARGH!H!",s.c_str()); // IMHO this is not intended....
}
I already replaced the memcpy UB with memmove instead, so I test against the following version and the original one in Arduino-API and both fail similarly...
void String::replace(const String& find, const String& replace)
{
if (len == 0 || find.len == 0) return;
int diff = replace.len - find.len;
char *readFrom = buffer;
char *foundAt;
if (diff == 0) {
while ((foundAt = strstr(readFrom, find.buffer)) != nullptr) {
//assert(foundAt != replace.buffer); // triggers
memmove(foundAt, replace.buffer, replace.len); // memcpy potential UB, if this == &replace
readFrom = foundAt + replace.len;
}
} else if (diff < 0) {
char *writeTo = buffer;
while ((foundAt = strstr(readFrom, find.buffer)) != nullptr) {
auto const n = static_cast<size_t>(foundAt - readFrom);
//assert(writeTo != readFrom); // triggers
memmove(writeTo, readFrom, n); // memcpy also UB if first in
writeTo += n;
assert(writeTo != replace.buffer); // this can not happen, even if same string
memcpy(writeTo, replace.buffer, replace.len);
writeTo += replace.len;
readFrom = foundAt + find.len;
len += diff;
}
//assert(writeTo != readFrom); // triggers
if (writeTo != readFrom) strcpy(writeTo, readFrom); // causes UB if writeTo and readFrom are equal
} else {
unsigned int size = len; // compute size needed for result
while ((foundAt = strstr(readFrom, find.buffer)) != nullptr) {
readFrom = foundAt + find.len;
size += diff;
}
if (size == len) return; // nothing found
if (size > capacity() && !changeBuffer(size)) return; // XXX: tell user!
int index = len - 1;
// the following logic seems to be wrong when part of find is a substring of replace..... causing UB
while (index >= 0 && (index = lastIndexOf(find, index)) >= 0) { // PS: that used to be even more inefficient!
readFrom = buffer + index + find.len;
memmove(readFrom + diff, readFrom, len - (readFrom - buffer));
len += diff;
buffer[len] = 0;
//assert(buffer+index != replace.buffer); // triggers therefore the following incurs UB
memmove(buffer + index, replace.buffer, replace.len); // memcpy UB
index--;
}
}
}
Metadata
Metadata
Assignees
Labels
No labels