From d8e47de1d6b8718bb53a527420d5da0dc6d532c1 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Wed, 9 Apr 2025 12:18:53 +0200 Subject: [PATCH 01/12] Add instruction location info to eof assembly --- libevmasm/Assembly.cpp | 64 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 9dd203a0f802..b16fcb5b0d3a 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -53,6 +53,54 @@ using namespace solidity::evmasm; using namespace solidity::langutil; using namespace solidity::util; +namespace +{ + +/// Produces instruction location info in RAII style. When an assembly instruction is added to the bytecode, +/// this class can be instantiated in that scope. It will record the current bytecode size (before addition) +/// and, at destruction time, record the new bytecode size. This information is then added to an external +/// instruction locations vector. +/// If the instruction decomposes into multiple individual evm instructions, `emit` can be +/// called for all but the last one (which will be emitted by the destructor). +class InstructionLocationEmitter +{ +public: + InstructionLocationEmitter( + std::vector& _instructionLocations, + bytes const& _bytecode, + size_t const _assemblyItemIndex + ): + m_instructionLocations(_instructionLocations), + m_bytecode(_bytecode), + m_assemblyItemIndex(_assemblyItemIndex), + m_instructionLocationStart(_bytecode.size()) + {} + + ~InstructionLocationEmitter() + { + emit(); + } + + void emit() + { + auto const end = m_bytecode.size(); + m_instructionLocations.emplace_back(LinkerObject::InstructionLocation{ + .start = m_instructionLocationStart, + .end = end, + .assemblyItemIndex = m_assemblyItemIndex + }); + m_instructionLocationStart = end; + } + +private: + std::vector& m_instructionLocations; + bytes const& m_bytecode; + size_t const m_assemblyItemIndex; + size_t m_instructionLocationStart; +}; + +} + std::map> Assembly::s_sharedSourceNames; AssemblyItem const& Assembly::append(AssemblyItem _i) @@ -1606,9 +1654,17 @@ LinkerObject const& Assembly::assembleEOF() const for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate) { auto const sectionStart = ret.bytecode.size(); + + std::vector instructionLocations; + instructionLocations.reserve(codeSection.items.size()); + solAssert(!codeSection.items.empty(), "Empty code section."); - for (AssemblyItem const& item: codeSection.items) + + for (auto const& [assemblyItemIndex, item]: codeSection.items | ranges::views::enumerate) { + // collect instruction locations via side effects + InstructionLocationEmitter instructionLocationEmitter {instructionLocations, ret.bytecode, assemblyItemIndex}; + // store position of the invalid jump destination if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits::max()) m_tagPositionsInBytecode[0] = ret.bytecode.size(); @@ -1724,6 +1780,12 @@ LinkerObject const& Assembly::assembleEOF() const "Code section too large for EOF." ); setBigEndianUint16(ret.bytecode, codeSectionSizePositions[codeSectionIndex], ret.bytecode.size() - sectionStart); + + ret.codeSectionLocations.emplace_back(LinkerObject::CodeSectionLocation{ + .start = sectionStart, + .end = ret.bytecode.size(), + .instructionLocations = std::move(instructionLocations) + }); } for (auto const& [refPos, tagId]: tagRef) From ef3311aea3b249215014a9e68a050747b6fc07a9 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:54:11 +0200 Subject: [PATCH 02/12] Refactor legacy assemble to also use RAII instruction location construction --- libevmasm/Assembly.cpp | 158 +++++++++++++++-------------------------- 1 file changed, 57 insertions(+), 101 deletions(-) diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index b16fcb5b0d3a..96141346bf4e 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -1329,22 +1329,12 @@ LinkerObject const& Assembly::assembleLegacy() const uint8_t dataRefPush = static_cast(pushInstruction(bytesPerDataRef)); LinkerObject::CodeSectionLocation codeSectionLocation; + codeSectionLocation.instructionLocations.reserve(items.size()); codeSectionLocation.start = 0; - size_t assemblyItemIndex = 0; - auto assembleInstruction = [&](auto&& _addInstruction) { - size_t start = ret.bytecode.size(); - _addInstruction(); - size_t end = ret.bytecode.size(); - codeSectionLocation.instructionLocations.emplace_back( - LinkerObject::InstructionLocation{ - .start = start, - .end = end, - .assemblyItemIndex = assemblyItemIndex - } - ); - }; - for (AssemblyItem const& item: items) + for (auto const& [assemblyItemIndex, item]: items | ranges::views::enumerate) { + // collect instruction locations via side effects + InstructionLocationEmitter instructionLocationEmitter(codeSectionLocation.instructionLocations, ret.bytecode, assemblyItemIndex); // store position of the invalid jump destination if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits::max()) m_tagPositionsInBytecode[0] = ret.bytecode.size(); @@ -1352,140 +1342,106 @@ LinkerObject const& Assembly::assembleLegacy() const switch (item.type()) { case Operation: - assembleInstruction([&](){ - ret.bytecode += assembleOperation(item); - }); + ret.bytecode += assembleOperation(item); break; case Push: - assembleInstruction([&](){ - ret.bytecode += assemblePush(item); - }); + ret.bytecode += assemblePush(item); break; case PushTag: - { - assembleInstruction([&](){ - ret.bytecode.push_back(tagPush); - tagRefs[ret.bytecode.size()] = item.splitForeignPushTag(); - ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); - }); + ret.bytecode.push_back(tagPush); + tagRefs[ret.bytecode.size()] = item.splitForeignPushTag(); + ret.bytecode.resize(ret.bytecode.size() + bytesPerTag); break; - } case PushData: - assembleInstruction([&]() { - ret.bytecode.push_back(dataRefPush); - dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size())); - ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); - }); + ret.bytecode.push_back(dataRefPush); + dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; case PushSub: - assembleInstruction([&]() { - assertThrow(item.data() <= std::numeric_limits::max(), AssemblyException, ""); - ret.bytecode.push_back(dataRefPush); - subRefs.insert(std::make_pair(static_cast(item.data()), ret.bytecode.size())); - ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); - }); + assertThrow(item.data() <= std::numeric_limits::max(), AssemblyException, ""); + ret.bytecode.push_back(dataRefPush); + subRefs.insert(std::make_pair(static_cast(item.data()), ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; case PushSubSize: { - assembleInstruction([&](){ - assertThrow(item.data() <= std::numeric_limits::max(), AssemblyException, ""); - auto s = subAssemblyById(static_cast(item.data()))->assemble().bytecode.size(); - item.setPushedValue(u256(s)); - unsigned b = std::max(1, numberEncodingSize(s)); - ret.bytecode.push_back(static_cast(pushInstruction(b))); - ret.bytecode.resize(ret.bytecode.size() + b); - bytesRef byr(&ret.bytecode.back() + 1 - b, b); - toBigEndian(s, byr); - }); + assertThrow(item.data() <= std::numeric_limits::max(), AssemblyException, ""); + auto s = subAssemblyById(static_cast(item.data()))->assemble().bytecode.size(); + item.setPushedValue(u256(s)); + unsigned b = std::max(1, numberEncodingSize(s)); + ret.bytecode.push_back(static_cast(pushInstruction(b))); + ret.bytecode.resize(ret.bytecode.size() + b); + bytesRef byr(&ret.bytecode.back() + 1 - b, b); + toBigEndian(s, byr); break; } case PushProgramSize: - { - assembleInstruction([&](){ - ret.bytecode.push_back(dataRefPush); - sizeRefs.push_back(static_cast(ret.bytecode.size())); - ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); - }); + ret.bytecode.push_back(dataRefPush); + sizeRefs.push_back(static_cast(ret.bytecode.size())); + ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef); break; - } case PushLibraryAddress: { - assembleInstruction([&]() { - auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size()); - ret.bytecode += bytecode; - ret.linkReferences.insert(linkRef); - }); + auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size()); + ret.bytecode += bytecode; + ret.linkReferences.insert(linkRef); break; } case PushImmutable: - assembleInstruction([&]() { - ret.bytecode.push_back(static_cast(Instruction::PUSH32)); - // Maps keccak back to the "identifier" std::string of that immutable. - ret.immutableReferences[item.data()].first = m_immutables.at(item.data()); - // Record the bytecode offset of the PUSH32 argument. - ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size()); - // Advance bytecode by 32 bytes (default initialized). - ret.bytecode.resize(ret.bytecode.size() + 32); - }); + ret.bytecode.push_back(static_cast(Instruction::PUSH32)); + // Maps keccak back to the "identifier" std::string of that immutable. + ret.immutableReferences[item.data()].first = m_immutables.at(item.data()); + // Record the bytecode offset of the PUSH32 argument. + ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size()); + // Advance bytecode by 32 bytes (default initialized). + ret.bytecode.resize(ret.bytecode.size() + 32); break; case VerbatimBytecode: ret.bytecode += assembleVerbatimBytecode(item); break; case AssignImmutable: { + // this decomposes into multiple evm instructions, so we manually call emit // Expect 2 elements on stack (source, dest_base) auto const& offsets = immutableReferencesBySub[item.data()].second; for (size_t i = 0; i < offsets.size(); ++i) { if (i != offsets.size() - 1) { - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::DUP2)); - }); - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::DUP2)); - }); + ret.bytecode.push_back(static_cast(Instruction::DUP2)); + instructionLocationEmitter.emit(); + ret.bytecode.push_back(static_cast(Instruction::DUP2)); + instructionLocationEmitter.emit(); } - assembleInstruction([&]() { - // TODO: should we make use of the constant optimizer methods for pushing the offsets? - bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); - ret.bytecode.push_back(static_cast(pushInstruction(static_cast(offsetBytes.size())))); - ret.bytecode += offsetBytes; - }); - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::ADD)); - }); - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::MSTORE)); - }); + // TODO: should we make use of the constant optimizer methods for pushing the offsets? + bytes offsetBytes = toCompactBigEndian(u256(offsets[i])); + ret.bytecode.push_back(static_cast(pushInstruction(static_cast(offsetBytes.size())))); + ret.bytecode += offsetBytes; + instructionLocationEmitter.emit(); + ret.bytecode.push_back(static_cast(Instruction::ADD)); + instructionLocationEmitter.emit(); + ret.bytecode.push_back(static_cast(Instruction::MSTORE)); + // no emit needed here, it's taken care of by the destructor of instructionLocationEmitter } if (offsets.empty()) { - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::POP)); - }); - assembleInstruction([&]() { - ret.bytecode.push_back(uint8_t(Instruction::POP)); - }); + ret.bytecode.push_back(static_cast(Instruction::POP)); + instructionLocationEmitter.emit(); + ret.bytecode.push_back(static_cast(Instruction::POP)); + // no emit needed here, it's taken care of by the destructor of instructionLocationEmitter } immutableReferencesBySub.erase(item.data()); break; } case PushDeployTimeAddress: - assembleInstruction([&]() { - ret.bytecode += assemblePushDeployTimeAddress(); - }); + ret.bytecode += assemblePushDeployTimeAddress(); break; case Tag: - assembleInstruction([&](){ - ret.bytecode += assembleTag(item, ret.bytecode.size(), true); - }); + ret.bytecode += assembleTag(item, ret.bytecode.size(), true); break; default: solAssert(false, "Unexpected opcode while assembling."); } - - ++assemblyItemIndex; } codeSectionLocation.end = ret.bytecode.size(); From 1203543ad053f40c15b7cc556487f47e4c43e51d Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Sat, 12 Apr 2025 07:30:38 +0200 Subject: [PATCH 03/12] Ethdebug requires assembly instance --- libevmasm/Ethdebug.cpp | 2 +- libevmasm/Ethdebug.h | 2 +- libsolidity/interface/CompilerStack.cpp | 5 ++++- libyul/YulStack.cpp | 4 ++-- test/libevmasm/Assembler.cpp | 4 ++-- 5 files changed, 10 insertions(+), 7 deletions(-) diff --git a/libevmasm/Ethdebug.cpp b/libevmasm/Ethdebug.cpp index 6e635ef57952..0251600066ce 100644 --- a/libevmasm/Ethdebug.cpp +++ b/libevmasm/Ethdebug.cpp @@ -77,7 +77,7 @@ Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerO } // anonymous namespace -Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject) +Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject) { Json result = Json::object(); result["contract"] = Json::object(); diff --git a/libevmasm/Ethdebug.h b/libevmasm/Ethdebug.h index 72ac16037969..fdb693b190c4 100644 --- a/libevmasm/Ethdebug.h +++ b/libevmasm/Ethdebug.h @@ -27,7 +27,7 @@ namespace solidity::evmasm::ethdebug { // returns ethdebug/format/program. -Json program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject); +Json program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject); // returns ethdebug/format/info/resources Json resources(std::vector const& _sources, std::string const& _version); diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 1b0a299f7291..975ed3c1982d 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -1216,8 +1216,11 @@ Json CompilerStack::ethdebug(Contract const& _contract, bool _runtime) const solUnimplementedAssert(!isExperimentalSolidity()); evmasm::LinkerObject const& object = _runtime ? _contract.runtimeObject : _contract.object; std::shared_ptr const& assembly = _runtime ? _contract.evmRuntimeAssembly : _contract.evmAssembly; + if (!assembly) + return {}; + solAssert(sourceIndices().contains(_contract.contract->sourceUnitName())); - return evmasm::ethdebug::program(_contract.contract->name(), sourceIndices()[_contract.contract->sourceUnitName()], assembly.get(), object); + return evmasm::ethdebug::program(_contract.contract->name(), sourceIndices()[_contract.contract->sourceUnitName()], *assembly, object); } bytes CompilerStack::cborMetadata(std::string const& _contractName, bool _forIR) const diff --git a/libyul/YulStack.cpp b/libyul/YulStack.cpp index 5adb8d6d5c9a..525269575b6a 100644 --- a/libyul/YulStack.cpp +++ b/libyul/YulStack.cpp @@ -275,14 +275,14 @@ YulStack::assembleWithDeployed(std::optional _deployName) ); } if (debugInfoSelection().ethdebug) - creationObject.ethdebug = evmasm::ethdebug::program(creationObject.assembly->name(), 0, creationObject.assembly.get(), *creationObject.bytecode.get()); + creationObject.ethdebug = evmasm::ethdebug::program(creationObject.assembly->name(), 0, *creationObject.assembly, *creationObject.bytecode); if (deployedAssembly) { deployedObject.bytecode = std::make_shared(deployedAssembly->assemble()); deployedObject.assembly = deployedAssembly; if (debugInfoSelection().ethdebug) - deployedObject.ethdebug = evmasm::ethdebug::program(deployedObject.assembly->name(), 0, deployedObject.assembly.get(), *deployedObject.bytecode.get()); + deployedObject.ethdebug = evmasm::ethdebug::program(deployedObject.assembly->name(), 0, *deployedObject.assembly, *deployedObject.bytecode); solAssert(deployedAssembly->codeSections().size() == 1); deployedObject.sourceMappings = std::make_unique( evmasm::AssemblyItem::computeSourceMapping( diff --git a/test/libevmasm/Assembler.cpp b/test/libevmasm/Assembler.cpp index 6a41f7a7394c..32855cfbc211 100644 --- a/test/libevmasm/Assembler.cpp +++ b/test/libevmasm/Assembler.cpp @@ -434,7 +434,7 @@ BOOST_AUTO_TEST_CASE(ethdebug_program_last_instruction_with_immediate_arguments) assembly.append(AssemblyItem{0x11223344}); LinkerObject output = assembly.assemble(); - Json const program = ethdebug::program("", 0, &assembly, output); + Json const program = ethdebug::program("", 0, assembly, output); BOOST_REQUIRE(program["instructions"].size() == 1); BOOST_REQUIRE(program["instructions"][0]["operation"]["mnemonic"] == "PUSH4"); BOOST_REQUIRE(program["instructions"][0]["operation"]["arguments"][0] == "0x11223344"); @@ -445,7 +445,7 @@ BOOST_AUTO_TEST_CASE(ethdebug_program_last_instruction_with_immediate_arguments) assembly.append(AssemblyItem{0x1122334455}); LinkerObject output = assembly.assemble(); - Json const program = ethdebug::program("", 0, &assembly, output); + Json const program = ethdebug::program("", 0, assembly, output); BOOST_REQUIRE(program["instructions"].size() == 2); BOOST_REQUIRE(program["instructions"][0]["operation"]["mnemonic"] == "PUSH0"); BOOST_REQUIRE(!program["instructions"][0]["operation"].contains("arguments")); From 0df6200491817d907d93417de245d74cfc0ad96c Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Tue, 8 Apr 2025 16:41:37 +0200 Subject: [PATCH 04/12] Ethdebug instructions output over multiple code sections --- Changelog.md | 2 +- libevmasm/Ethdebug.cpp | 85 ++++++++++++++++++++++++++++-------------- 2 files changed, 57 insertions(+), 30 deletions(-) diff --git a/Changelog.md b/Changelog.md index 967e548814ef..233a1cb2f489 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,7 +4,7 @@ Language Features: Compiler Features: - +* Ethdebug: Experimental support for instructions and source locations under EOF. Bugfixes: diff --git a/libevmasm/Ethdebug.cpp b/libevmasm/Ethdebug.cpp index 0251600066ce..0a4c56ddd0a0 100644 --- a/libevmasm/Ethdebug.cpp +++ b/libevmasm/Ethdebug.cpp @@ -18,6 +18,8 @@ #include +#include + using namespace solidity; using namespace solidity::evmasm; using namespace solidity::evmasm::ethdebug; @@ -25,25 +27,35 @@ using namespace solidity::evmasm::ethdebug; namespace { -Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId) +std::vector codeSectionInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId, size_t const _codeSectionIndex) { - solUnimplementedAssert(_assembly.eofVersion() == std::nullopt, "ethdebug does not yet support EOF."); - solUnimplementedAssert(_assembly.codeSections().size() == 1, "ethdebug does not yet support multiple code-sections."); - for (auto const& instruction: _assembly.codeSections()[0].items) - solUnimplementedAssert(instruction.type() != VerbatimBytecode, "Verbatim bytecode is currently not supported by ethdebug."); - - solAssert(_linkerObject.codeSectionLocations.size() == 1); - solAssert(_linkerObject.codeSectionLocations[0].end <= _linkerObject.bytecode.size()); - Json instructions = Json::array(); - for (size_t i = 0; i < _linkerObject.codeSectionLocations[0].instructionLocations.size(); ++i) + solAssert(_codeSectionIndex < _linkerObject.codeSectionLocations.size()); + solAssert(_codeSectionIndex < _assembly.codeSections().size()); + auto const& locations = _linkerObject.codeSectionLocations[_codeSectionIndex]; + auto const& codeSection = _assembly.codeSections().at(_codeSectionIndex); + + std::vector instructions; + instructions.reserve(codeSection.items.size()); + + bool const codeSectionContainsVerbatim = ranges::any_of( + codeSection.items, + [](auto const& _instruction) { return _instruction.type() == VerbatimBytecode; } + ); + solUnimplementedAssert(!codeSectionContainsVerbatim, "Verbatim bytecode is currently not supported by ethdebug."); + + for (auto const& currentInstruction: locations.instructionLocations) { - LinkerObject::InstructionLocation currentInstruction = _linkerObject.codeSectionLocations[0].instructionLocations[i]; - size_t start = currentInstruction.start; - size_t end = currentInstruction.end; - size_t assemblyItemIndex = currentInstruction.assemblyItemIndex; + size_t const start = currentInstruction.start; + size_t const end = currentInstruction.end; + + // some instructions do not contribute to the bytecode + if (start == end) + continue; + + size_t const assemblyItemIndex = currentInstruction.assemblyItemIndex; solAssert(end <= _linkerObject.bytecode.size()); solAssert(start < end); - solAssert(assemblyItemIndex < _assembly.codeSections().at(0).items.size()); + solAssert(assemblyItemIndex < codeSection.items.size()); Json operation = Json::object(); operation["mnemonic"] = instructionInfo(static_cast(_linkerObject.bytecode[start]), _assembly.evmVersion()).name; static size_t constexpr instructionSize = 1; @@ -56,25 +68,40 @@ Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerO solAssert(!argumentData.empty()); operation["arguments"] = Json::array({util::toHex(argumentData, util::HexPrefix::Add)}); } - langutil::SourceLocation const& location = _assembly.codeSections().at(0).items.at(assemblyItemIndex).location(); - Json instruction = Json::object(); - instruction["offset"] = start; - instruction["operation"] = operation; - - instruction["context"] = Json::object(); - instruction["context"]["code"] = Json::object(); - instruction["context"]["code"]["source"] = Json::object(); - instruction["context"]["code"]["source"]["id"] = static_cast(_sourceId); - - instruction["context"]["code"]["range"] = Json::object(); - instruction["context"]["code"]["range"]["offset"] = location.start; - instruction["context"]["code"]["range"]["length"] = location.end - location.start; - instructions.emplace_back(instruction); + langutil::SourceLocation const& location = codeSection.items.at(assemblyItemIndex).location(); + instructions.emplace_back(Json{ + { "offset", start }, + {"operation", operation }, + { + "context", { + "code", { + "source", { + { "id", static_cast(_sourceId) }, + }, + "range", { + { "offset", location.start }, + { "length", location.end - location.start } + } + } + } + } + }); } return instructions; } +Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId) +{ + auto const numCodeSections = _assembly.codeSections().size(); + solAssert(numCodeSections == _linkerObject.codeSectionLocations.size()); + + std::vector instructionInfo; + for (size_t codeSectionIndex = 0; codeSectionIndex < numCodeSections; ++codeSectionIndex) + instructionInfo += codeSectionInstructions(_assembly, _linkerObject, _sourceId, codeSectionIndex); + return instructionInfo; +} + } // anonymous namespace Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject) From 3ceaa34e961f9e689d7bae3d9a4c48a2f546ffb1 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Fri, 11 Apr 2025 10:48:54 +0200 Subject: [PATCH 05/12] Add ethdebug schema --- libevmasm/CMakeLists.txt | 2 + libevmasm/EthdebugSchema.cpp | 143 +++++++++++++++++++++++++++++ libevmasm/EthdebugSchema.h | 172 +++++++++++++++++++++++++++++++++++ 3 files changed, 317 insertions(+) create mode 100644 libevmasm/EthdebugSchema.cpp create mode 100644 libevmasm/EthdebugSchema.h diff --git a/libevmasm/CMakeLists.txt b/libevmasm/CMakeLists.txt index a7441b85e264..74aaeb6c293b 100644 --- a/libevmasm/CMakeLists.txt +++ b/libevmasm/CMakeLists.txt @@ -6,6 +6,8 @@ set(sources AssemblyItem.h Ethdebug.cpp Ethdebug.h + EthdebugSchema.cpp + EthdebugSchema.h EVMAssemblyStack.cpp EVMAssemblyStack.h BlockDeduplicator.cpp diff --git a/libevmasm/EthdebugSchema.cpp b/libevmasm/EthdebugSchema.cpp new file mode 100644 index 000000000000..c54b7167f53e --- /dev/null +++ b/libevmasm/EthdebugSchema.cpp @@ -0,0 +1,143 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +// SPDX-License-Identifier: GPL-3.0 + +#include + +#include +#include + +using namespace solidity; +using namespace solidity::evmasm::ethdebug; + +void schema::data::to_json(Json& _json, HexValue const& _hexValue) +{ + _json = util::toHex(_hexValue.value, util::HexPrefix::Add); +} + +void schema::data::to_json(Json& _json, Unsigned const& _unsigned) +{ + std::visit(util::GenericVisitor{ + [&](HexValue const& _hexValue) { _json = _hexValue; }, + [&](std::uint64_t const _value) { _json = _value; } + }, _unsigned.value); +} + +void schema::materials::to_json(Json& _json, ID const& _id) +{ + std::visit(util::GenericVisitor{ + [&](std::string const& _hexValue) { _json = _hexValue; }, + [&](std::uint64_t const _value) { _json = _value; } + }, _id.value); +} + +void schema::materials::to_json(Json& _json, Reference const& _source) +{ + _json["id"] = _source.id; + if (_source.type) + _json["type"] = *_source.type == Reference::Type::Compilation ? "compilation" : "source"; +} + +void schema::materials::to_json(Json& _json, SourceRange::Range const& _range) +{ + _json["length"] = _range.length; + _json["offset"] = _range.offset; +} + + +void schema::materials::to_json(Json& _json, SourceRange const& _sourceRange) +{ + _json["source"] = _sourceRange.source; + if (_sourceRange.range) + _json["range"] = *_sourceRange.range; +} + +void schema::to_json(Json& _json, Program::Contract const& _contract) +{ + if (_contract.name) + _json["name"] = *_contract.name; + _json["definition"] = _contract.definition; +} + +void schema::program::to_json(Json& _json, Context::Variable const& _contextVariable) +{ + auto const numProperties = + _contextVariable.identifier.has_value() + + _contextVariable.declaration.has_value(); + solRequire(numProperties >= 1, EthdebugException, "Context variable has no properties."); + if (_contextVariable.identifier) + { + solRequire(!_contextVariable.identifier->empty(), EthdebugException, "Variable identifier must not be empty."); + _json["identifier"] = *_contextVariable.identifier; + } + if (_contextVariable.declaration) + _json["declaration"] = *_contextVariable.declaration; +} + +void schema::program::to_json(Json& _json, Context const& _context) +{ + solRequire(_context.code.has_value() + _context.remark.has_value() + _context.variables.has_value() >= 1, EthdebugException, "Context needs >=1 properties."); + if (_context.code) + _json["code"] = *_context.code; + if (_context.variables) + { + solRequire(!_context.variables->empty(), EthdebugException, "Context variables must not be empty if provided."); + _json["variables"] = *_context.variables; + } + if (_context.remark) + _json["remark"] = *_context.remark; +} + +void schema::program::to_json(Json& _json, Instruction::Operation const& _operation) +{ + _json = { {"mnemonic", _operation.mnemonic} }; + if (!_operation.arguments.empty()) + _json["arguments"] = _operation.arguments; +} + +void schema::program::to_json(Json& _json, Instruction const& _instruction) +{ + _json["offset"] = _instruction.offset; + if (_instruction.operation) + _json["operation"] = *_instruction.operation; + if (_instruction.context) + _json["context"] = *_instruction.context; +} + +void schema::to_json(Json& _json, Program const& _program) +{ + if (_program.compilation) + _json["compilation"] = *_program.compilation; + _json["contract"] = _program.contract; + _json["environment"] = _program.environment; + if (_program.context) + _json["context"] = *_program.context; + _json["instructions"] = _program.instructions; +} + +void schema::to_json(Json& _json, Program::Environment const& _environment) +{ + switch (_environment) + { + case Program::Environment::CALL: + _json = "call"; + break; + case Program::Environment::CREATE: + _json = "create"; + break; + } +} diff --git a/libevmasm/EthdebugSchema.h b/libevmasm/EthdebugSchema.h new file mode 100644 index 000000000000..5fc8f78fd62e --- /dev/null +++ b/libevmasm/EthdebugSchema.h @@ -0,0 +1,172 @@ +/* + This file is part of solidity. + + solidity is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + solidity is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with solidity. If not, see . +*/ +// SPDX-License-Identifier: GPL-3.0 + +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace solidity::evmasm::ethdebug::schema +{ + +struct EthdebugException: virtual util::Exception {}; + +namespace data +{ + +struct HexValue +{ + bytes value; +}; + +struct Unsigned +{ + template + Unsigned(T const _value) + { + solRequire(static_cast(_value) <= std::numeric_limits::max(), EthdebugException, "Too large value."); + value = static_cast(_value); + } + template + Unsigned(T const _value) + { + solRequire(_value >= 0, EthdebugException, "NonNegativeValue got negative value."); + solRequire(static_cast>(_value) <= std::numeric_limits::max(), EthdebugException, "Too large value."); + value = static_cast(_value); + } + Unsigned(HexValue&& _value): value(std::move(_value)) {} + + std::variant value; +}; + +} + +namespace materials +{ + +struct ID +{ + std::variant value; +}; + +struct Reference +{ + enum class Type { Compilation, Source }; + ID id; + std::optional type; +}; + +struct SourceRange +{ + struct Range + { + data::Unsigned length; + data::Unsigned offset; + }; + + Reference source; + std::optional range; +}; + +} + +namespace program +{ + +struct Context +{ + struct Variable + { + std::optional identifier; + std::optional declaration; + // TODO: type + // TODO: pointer according to ethdebug/format/spec/pointer + }; + + std::optional code; + std::optional> variables; + std::optional remark; +}; + +struct Instruction +{ + struct Operation + { + std::string mnemonic; + std::vector arguments; + }; + + data::Unsigned offset; + std::optional operation; + std::optional context; +}; + +} + +struct Program +{ + enum class Environment + { + CALL, CREATE + }; + + struct Contract + { + std::optional name; + materials::SourceRange definition; + }; + + std::optional compilation; + Contract contract; + Environment environment; + std::optional context; + std::vector instructions; +}; + +namespace data +{ +void to_json(Json& _json, HexValue const& _hexValue); +void to_json(Json& _json, Unsigned const& _unsigned); +} + +namespace materials +{ +void to_json(Json& _json, ID const& _id); +void to_json(Json& _json, Reference const& _source); +void to_json(Json& _json, SourceRange::Range const& _range); +void to_json(Json& _json, SourceRange const& _sourceRange); +} + +namespace program +{ +void to_json(Json& _json, Context::Variable const& _contextVariable); +void to_json(Json& _json, Context const& _context); +void to_json(Json& _json, Instruction::Operation const& _operation); +void to_json(Json& _json, Instruction const& _instruction); +} + +void to_json(Json& _json, Program::Contract const& _contract); +void to_json(Json& _json, Program::Environment const& _environment); +void to_json(Json& _json, Program const& _program); + +} From c0156f07c1410404fb23795a7939f32e17d02153 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Wed, 9 Apr 2025 15:22:54 +0200 Subject: [PATCH 06/12] Ethdebug uses schema for serialization --- libevmasm/Ethdebug.cpp | 131 ++++++++++++++++---------- libevmasm/Ethdebug.h | 2 +- test/libsolidity/StandardCompiler.cpp | 9 +- 3 files changed, 87 insertions(+), 55 deletions(-) diff --git a/libevmasm/Ethdebug.cpp b/libevmasm/Ethdebug.cpp index 0a4c56ddd0a0..8b87aadd072a 100644 --- a/libevmasm/Ethdebug.cpp +++ b/libevmasm/Ethdebug.cpp @@ -18,6 +18,8 @@ #include +#include + #include using namespace solidity; @@ -27,14 +29,66 @@ using namespace solidity::evmasm::ethdebug; namespace { -std::vector codeSectionInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId, size_t const _codeSectionIndex) +schema::program::Instruction::Operation instructionOperation(Assembly const& _assembly, LinkerObject const& _linkerObject, size_t const _start, size_t const _end) +{ + solAssert(_end <= _linkerObject.bytecode.size()); + solAssert(_start < _end); + schema::program::Instruction::Operation operation; + operation.mnemonic = instructionInfo(static_cast(_linkerObject.bytecode[_start]), _assembly.evmVersion()).name; + static size_t constexpr instructionSize = 1; + if (_start + instructionSize < _end) + { + bytes const argumentData( + _linkerObject.bytecode.begin() + static_cast(_start) + instructionSize, + _linkerObject.bytecode.begin() + static_cast(_end) + ); + solAssert(!argumentData.empty()); + operation.arguments = {{schema::data::HexValue{argumentData}}}; + } + return operation; +} + +schema::materials::SourceRange::Range locationRange(langutil::SourceLocation const& _location) +{ + return { + .length = schema::data::Unsigned{_location.end - _location.start}, + .offset = schema::data::Unsigned{_location.start} + }; +} + +schema::materials::Reference sourceReference(unsigned _sourceID) +{ + return { + .id = schema::materials::ID{_sourceID}, + .type = std::nullopt + }; +} + +std::optional instructionContext(Assembly::CodeSection const& _codeSection, size_t _assemblyItemIndex, unsigned _sourceID) +{ + solAssert(_assemblyItemIndex < _codeSection.items.size()); + langutil::SourceLocation const& location = _codeSection.items.at(_assemblyItemIndex).location(); + if (!location.isValid()) + return std::nullopt; + + return schema::program::Context{ + schema::materials::SourceRange{ + .source = sourceReference(_sourceID), + .range = locationRange(location) + }, + std::nullopt, + std::nullopt + }; +} + +std::vector codeSectionInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned const _sourceID, size_t const _codeSectionIndex) { solAssert(_codeSectionIndex < _linkerObject.codeSectionLocations.size()); solAssert(_codeSectionIndex < _assembly.codeSections().size()); auto const& locations = _linkerObject.codeSectionLocations[_codeSectionIndex]; auto const& codeSection = _assembly.codeSections().at(_codeSectionIndex); - std::vector instructions; + std::vector instructions; instructions.reserve(codeSection.items.size()); bool const codeSectionContainsVerbatim = ranges::any_of( @@ -52,72 +106,47 @@ std::vector codeSectionInstructions(Assembly const& _assembly, LinkerObjec if (start == end) continue; - size_t const assemblyItemIndex = currentInstruction.assemblyItemIndex; - solAssert(end <= _linkerObject.bytecode.size()); - solAssert(start < end); - solAssert(assemblyItemIndex < codeSection.items.size()); - Json operation = Json::object(); - operation["mnemonic"] = instructionInfo(static_cast(_linkerObject.bytecode[start]), _assembly.evmVersion()).name; - static size_t constexpr instructionSize = 1; - if (start + instructionSize < end) - { - bytes const argumentData( - _linkerObject.bytecode.begin() + static_cast(start) + instructionSize, - _linkerObject.bytecode.begin() + static_cast(end) - ); - solAssert(!argumentData.empty()); - operation["arguments"] = Json::array({util::toHex(argumentData, util::HexPrefix::Add)}); - } - langutil::SourceLocation const& location = codeSection.items.at(assemblyItemIndex).location(); - instructions.emplace_back(Json{ - { "offset", start }, - {"operation", operation }, - { - "context", { - "code", { - "source", { - { "id", static_cast(_sourceId) }, - }, - "range", { - { "offset", location.start }, - { "length", location.end - location.start } - } - } - } - } + instructions.emplace_back(schema::program::Instruction{ + .offset = schema::data::Unsigned{start}, + .operation = instructionOperation(_assembly, _linkerObject, start, end), + .context = instructionContext(codeSection, currentInstruction.assemblyItemIndex, _sourceID) }); } return instructions; } -Json programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned _sourceId) +std::vector programInstructions(Assembly const& _assembly, LinkerObject const& _linkerObject, unsigned const _sourceID) { auto const numCodeSections = _assembly.codeSections().size(); solAssert(numCodeSections == _linkerObject.codeSectionLocations.size()); - std::vector instructionInfo; + std::vector instructionInfo; for (size_t codeSectionIndex = 0; codeSectionIndex < numCodeSections; ++codeSectionIndex) - instructionInfo += codeSectionInstructions(_assembly, _linkerObject, _sourceId, codeSectionIndex); + instructionInfo += codeSectionInstructions(_assembly, _linkerObject, _sourceID, codeSectionIndex); return instructionInfo; } } // anonymous namespace -Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject) +Json ethdebug::program(std::string_view _name, unsigned _sourceID, Assembly const& _assembly, LinkerObject const& _linkerObject) { - Json result = Json::object(); - result["contract"] = Json::object(); - result["contract"]["name"] = _name; - result["contract"]["definition"] = Json::object(); - result["contract"]["definition"]["source"] = Json::object(); - result["contract"]["definition"]["source"]["id"] = _sourceId; - if (_assembly) - { - result["environment"] = _assembly->isCreation() ? "create" : "call"; - result["instructions"] = programInstructions(*_assembly, _linkerObject, _sourceId); - } - return result; + return schema::Program{ + .compilation = std::nullopt, + .contract = { + .name = std::string{_name}, + .definition = { + .source = { + .id = {_sourceID}, + .type = std::nullopt + }, + .range = std::nullopt + } + }, + .environment = _assembly.isCreation() ? schema::Program::Environment::CREATE : schema::Program::Environment::CALL, + .context = std::nullopt, + .instructions = programInstructions(_assembly, _linkerObject, _sourceID) + }; } Json ethdebug::resources(std::vector const& _sources, std::string const& _version) diff --git a/libevmasm/Ethdebug.h b/libevmasm/Ethdebug.h index fdb693b190c4..2e0df3484ba6 100644 --- a/libevmasm/Ethdebug.h +++ b/libevmasm/Ethdebug.h @@ -27,7 +27,7 @@ namespace solidity::evmasm::ethdebug { // returns ethdebug/format/program. -Json program(std::string_view _name, unsigned _sourceId, Assembly const& _assembly, LinkerObject const& _linkerObject); +Json program(std::string_view _name, unsigned _sourceID, Assembly const& _assembly, LinkerObject const& _linkerObject); // returns ethdebug/format/info/resources Json resources(std::vector const& _sources, std::string const& _version); diff --git a/test/libsolidity/StandardCompiler.cpp b/test/libsolidity/StandardCompiler.cpp index 5360a2778677..539d9195155b 100644 --- a/test/libsolidity/StandardCompiler.cpp +++ b/test/libsolidity/StandardCompiler.cpp @@ -2231,9 +2231,12 @@ BOOST_DATA_TEST_CASE(ethdebug_output_instructions_smoketest, boost::unit_test::d BOOST_REQUIRE(instruction.contains("offset")); BOOST_REQUIRE(instruction.contains("operation")); BOOST_REQUIRE(instruction["operation"].contains("mnemonic")); - BOOST_REQUIRE(instruction["context"]["code"]["range"].contains("length")); - BOOST_REQUIRE(instruction["context"]["code"]["range"].contains("offset")); - BOOST_REQUIRE(instruction["context"]["code"]["source"].contains("id")); + if (instruction.contains("context")) + { + BOOST_REQUIRE(instruction["context"]["code"]["range"].contains("length")); + BOOST_REQUIRE(instruction["context"]["code"]["range"].contains("offset")); + BOOST_REQUIRE(instruction["context"]["code"]["source"].contains("id")); + } std::string mnemonic = instruction["operation"]["mnemonic"]; if (mnemonic.find("PUSH") != std::string::npos) { From 77411c0929f1f4c1f3cab28d60e52a34527d2199 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Wed, 9 Apr 2025 15:23:21 +0200 Subject: [PATCH 07/12] Update test expectations with eof-enabled ethdebug --- .../ethdebug_eof_container_osaka/args | 2 +- .../ethdebug_eof_container_osaka/err | 1 - .../ethdebug_eof_container_osaka/exit | 1 - .../ethdebug_eof_container_osaka/output | 251 +++++++++++++++++- 4 files changed, 251 insertions(+), 4 deletions(-) delete mode 100644 test/cmdlineTests/ethdebug_eof_container_osaka/err delete mode 100644 test/cmdlineTests/ethdebug_eof_container_osaka/exit diff --git a/test/cmdlineTests/ethdebug_eof_container_osaka/args b/test/cmdlineTests/ethdebug_eof_container_osaka/args index 65974c6287da..16466b842d0e 100644 --- a/test/cmdlineTests/ethdebug_eof_container_osaka/args +++ b/test/cmdlineTests/ethdebug_eof_container_osaka/args @@ -1 +1 @@ - --experimental-eof-version 1 --evm-version osaka --ethdebug --via-ir + --experimental-eof-version 1 --evm-version osaka --ethdebug --via-ir --pretty-json --json-indent 4 diff --git a/test/cmdlineTests/ethdebug_eof_container_osaka/err b/test/cmdlineTests/ethdebug_eof_container_osaka/err deleted file mode 100644 index 7714685971d2..000000000000 --- a/test/cmdlineTests/ethdebug_eof_container_osaka/err +++ /dev/null @@ -1 +0,0 @@ -Error: ethdebug does not yet support EOF. diff --git a/test/cmdlineTests/ethdebug_eof_container_osaka/exit b/test/cmdlineTests/ethdebug_eof_container_osaka/exit deleted file mode 100644 index d00491fd7e5b..000000000000 --- a/test/cmdlineTests/ethdebug_eof_container_osaka/exit +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/test/cmdlineTests/ethdebug_eof_container_osaka/output b/test/cmdlineTests/ethdebug_eof_container_osaka/output index b29297ee3749..e9b57d885711 100644 --- a/test/cmdlineTests/ethdebug_eof_container_osaka/output +++ b/test/cmdlineTests/ethdebug_eof_container_osaka/output @@ -1,4 +1,253 @@ ======= Debug Data (ethdebug/format/info/resources) ======= -{"compilation":{"compiler":{"name":"solc","version": ""},"sources":[{"id":0,"path":"input.sol"}]}} +{ + "compilation": { + "compiler": { + "name": "solc", + "version": "" + }, + "sources": [ + { + "id": 0, + "path": "input.sol" + } + ] + } +} ======= input.sol:C ======= +Debug Data (ethdebug/format/program): +{ + "contract": { + "definition": { + "source": { + "id": 0 + } + }, + "name": "C" + }, + "environment": "create", + "instructions": [ + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 30, + "operation": { + "arguments": [ + "0x80" + ], + "mnemonic": "PUSH1" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 32, + "operation": { + "arguments": [ + "0x40" + ], + "mnemonic": "PUSH1" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 34, + "operation": { + "mnemonic": "MSTORE" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 35, + "operation": { + "mnemonic": "CALLVALUE" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 36, + "operation": { + "arguments": [ + "0x0005" + ], + "mnemonic": "RJUMPI" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 39, + "operation": { + "mnemonic": "PUSH0" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 40, + "operation": { + "arguments": [ + "0x80" + ], + "mnemonic": "PUSH1" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 42, + "operation": { + "arguments": [ + "0x00" + ], + "mnemonic": "RETURNCONTRACT" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 44, + "operation": { + "arguments": [ + "0x0001" + ], + "mnemonic": "JUMPF" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 47, + "operation": { + "mnemonic": "PUSH0" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 48, + "operation": { + "mnemonic": "DUP1" + } + }, + { + "context": { + "code": { + "range": { + "length": 41, + "offset": 60 + }, + "source": { + "id": 0 + } + } + }, + "offset": 49, + "operation": { + "mnemonic": "REVERT" + } + } + ] +} From 3328101300b17ceb703910af4bfec55f46f20395 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Fri, 11 Apr 2025 11:37:46 +0200 Subject: [PATCH 08/12] Test ethdebug output when requested in standard json under EOF --- .../args | 1 + .../in.yul | 16 +++++++ .../input.json | 18 ++++++++ .../output.json | 42 +++++++++++++++++++ .../strip-ethdebug | 0 .../standard_yul_ethdebug_eof/args | 1 + .../standard_yul_ethdebug_eof/in.yul | 17 ++++++++ .../standard_yul_ethdebug_eof/input.json | 13 ++++++ .../standard_yul_ethdebug_eof/output.json | 42 +++++++++++++++++++ .../standard_yul_ethdebug_eof/strip-ethdebug | 0 10 files changed, 150 insertions(+) create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/args create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/in.yul create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/input.json create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/output.json create mode 100644 test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/strip-ethdebug create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/args create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/in.yul create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/input.json create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/output.json create mode 100644 test/cmdlineTests/standard_yul_ethdebug_eof/strip-ethdebug diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/args b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/args new file mode 100644 index 000000000000..18532c5a6d3f --- /dev/null +++ b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/args @@ -0,0 +1 @@ +--allow-paths . diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/in.yul b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/in.yul new file mode 100644 index 000000000000..aa564d00ce86 --- /dev/null +++ b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/in.yul @@ -0,0 +1,16 @@ +/// @use-src 0:"input.sol" +object "C_6_deployed" { + code { + /// @src 0:60:101 "contract C {..." + mstore(64, 128) + + // f() + fun_f_5() + + /// @src 0:77:99 "function f() public {}" + function fun_f_5() { + sstore(0, 42) + } + /// @src 0:60:101 "contract C {..." + } +} diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/input.json b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/input.json new file mode 100644 index 000000000000..a8c3436c1a0f --- /dev/null +++ b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/input.json @@ -0,0 +1,18 @@ +{ + "language": "Yul", + "sources": { + "C": { + "urls": [ + "in.yul" + ] + } + }, + "settings": { + "eofVersion": 1, + "evmVersion": "osaka", + "debug": {"debugInfo": ["ethdebug"]}, + "outputSelection": { + "*": {"*": ["ir", "irOptimized", "evm.bytecode.ethdebug"]} + } + } +} diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/output.json b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/output.json new file mode 100644 index 000000000000..2b2f395fadd4 --- /dev/null +++ b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/output.json @@ -0,0 +1,42 @@ +{ + "contracts": { + "C": { + "C_6_deployed": { + "evm": { + "bytecode": { + "ethdebug": "" + } + }, + "ir": "/// ethdebug: enabled +/// @use-src 0:\"input.sol\" +object \"C_6_deployed\" { + code { + /// @src 0:60:101 + mstore(64, 128) + fun_f_5() + /// @src 0:77:99 + function fun_f_5() + { sstore(0, 42) } + } +} +", + "irOptimized": "/// ethdebug: enabled +/// @use-src 0:\"input.sol\" +object \"C_6_deployed\" { + code { + { + /// @src 0:60:101 + mstore(64, 128) + fun_f() + } + /// @src 0:77:99 + function fun_f() + { sstore(0, 42) } + } +} +" + } + } + }, + "ethdebug": "" +} diff --git a/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/strip-ethdebug b/test/cmdlineTests/standard_yul_debug_info_ethdebug_compatible_output_eof/strip-ethdebug new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/args b/test/cmdlineTests/standard_yul_ethdebug_eof/args new file mode 100644 index 000000000000..18532c5a6d3f --- /dev/null +++ b/test/cmdlineTests/standard_yul_ethdebug_eof/args @@ -0,0 +1 @@ +--allow-paths . diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/in.yul b/test/cmdlineTests/standard_yul_ethdebug_eof/in.yul new file mode 100644 index 000000000000..920aef8e9dc2 --- /dev/null +++ b/test/cmdlineTests/standard_yul_ethdebug_eof/in.yul @@ -0,0 +1,17 @@ +/// @use-src 0:"input.sol" +object "C_6_deployed" { + code { + /// @src 0:60:101 "contract C {..." + mstore(64, 128) + + // f() + fun_f_5() + + /// @src 0:77:99 "function f() public {}" + function fun_f_5() { + sstore(0, 42) + } + /// @src 0:60:101 "contract C {..." + } +} + diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/input.json b/test/cmdlineTests/standard_yul_ethdebug_eof/input.json new file mode 100644 index 000000000000..4e33c42670a6 --- /dev/null +++ b/test/cmdlineTests/standard_yul_ethdebug_eof/input.json @@ -0,0 +1,13 @@ +{ + "language": "Yul", + "sources": { + "C": {"urls": ["in.yul"]} + }, + "settings": { + "eofVersion": 1, + "evmVersion": "osaka", + "outputSelection": { + "*": {"*": ["evm.bytecode.ethdebug", "evm.deployedBytecode.ethdebug", "ir", "irOptimized"]} + } + } +} diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/output.json b/test/cmdlineTests/standard_yul_ethdebug_eof/output.json new file mode 100644 index 000000000000..2b2f395fadd4 --- /dev/null +++ b/test/cmdlineTests/standard_yul_ethdebug_eof/output.json @@ -0,0 +1,42 @@ +{ + "contracts": { + "C": { + "C_6_deployed": { + "evm": { + "bytecode": { + "ethdebug": "" + } + }, + "ir": "/// ethdebug: enabled +/// @use-src 0:\"input.sol\" +object \"C_6_deployed\" { + code { + /// @src 0:60:101 + mstore(64, 128) + fun_f_5() + /// @src 0:77:99 + function fun_f_5() + { sstore(0, 42) } + } +} +", + "irOptimized": "/// ethdebug: enabled +/// @use-src 0:\"input.sol\" +object \"C_6_deployed\" { + code { + { + /// @src 0:60:101 + mstore(64, 128) + fun_f() + } + /// @src 0:77:99 + function fun_f() + { sstore(0, 42) } + } +} +" + } + } + }, + "ethdebug": "" +} diff --git a/test/cmdlineTests/standard_yul_ethdebug_eof/strip-ethdebug b/test/cmdlineTests/standard_yul_ethdebug_eof/strip-ethdebug new file mode 100644 index 000000000000..e69de29bb2d1 From 667d2e3f0eb4c0c1227ca7f4e35cd814146a63cb Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Tue, 6 May 2025 13:16:51 +0200 Subject: [PATCH 09/12] Add tests for ethdebug output on abstract contracts and interfaces --- test/cmdlineTests/ethdebug_on_abstract/args | 1 + .../ethdebug_on_abstract/input.sol | 6 +++ test/cmdlineTests/ethdebug_on_abstract/output | 19 +++++++ test/cmdlineTests/ethdebug_on_interface/args | 1 + .../ethdebug_on_interface/input.sol | 6 +++ .../cmdlineTests/ethdebug_on_interface/output | 19 +++++++ .../input.json | 27 ++++++++++ .../output.json | 54 +++++++++++++++++++ 8 files changed, 133 insertions(+) create mode 100644 test/cmdlineTests/ethdebug_on_abstract/args create mode 100644 test/cmdlineTests/ethdebug_on_abstract/input.sol create mode 100644 test/cmdlineTests/ethdebug_on_abstract/output create mode 100644 test/cmdlineTests/ethdebug_on_interface/args create mode 100644 test/cmdlineTests/ethdebug_on_interface/input.sol create mode 100644 test/cmdlineTests/ethdebug_on_interface/output create mode 100644 test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/input.json create mode 100644 test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/output.json diff --git a/test/cmdlineTests/ethdebug_on_abstract/args b/test/cmdlineTests/ethdebug_on_abstract/args new file mode 100644 index 000000000000..c6018b329a67 --- /dev/null +++ b/test/cmdlineTests/ethdebug_on_abstract/args @@ -0,0 +1 @@ +--ethdebug --via-ir --pretty-json --json-indent 4 diff --git a/test/cmdlineTests/ethdebug_on_abstract/input.sol b/test/cmdlineTests/ethdebug_on_abstract/input.sol new file mode 100644 index 000000000000..29ff21a20ce8 --- /dev/null +++ b/test/cmdlineTests/ethdebug_on_abstract/input.sol @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +pragma solidity >=0.0; + +abstract contract C { + function f() public virtual returns (bytes32); +} diff --git a/test/cmdlineTests/ethdebug_on_abstract/output b/test/cmdlineTests/ethdebug_on_abstract/output new file mode 100644 index 000000000000..744ec0e77642 --- /dev/null +++ b/test/cmdlineTests/ethdebug_on_abstract/output @@ -0,0 +1,19 @@ +======= Debug Data (ethdebug/format/info/resources) ======= +{ + "compilation": { + "compiler": { + "name": "solc", + "version": "" + }, + "sources": [ + { + "id": 0, + "path": "input.sol" + } + ] + } +} + +======= input.sol:C ======= +Debug Data (ethdebug/format/program): +null diff --git a/test/cmdlineTests/ethdebug_on_interface/args b/test/cmdlineTests/ethdebug_on_interface/args new file mode 100644 index 000000000000..c6018b329a67 --- /dev/null +++ b/test/cmdlineTests/ethdebug_on_interface/args @@ -0,0 +1 @@ +--ethdebug --via-ir --pretty-json --json-indent 4 diff --git a/test/cmdlineTests/ethdebug_on_interface/input.sol b/test/cmdlineTests/ethdebug_on_interface/input.sol new file mode 100644 index 000000000000..cd008c45b465 --- /dev/null +++ b/test/cmdlineTests/ethdebug_on_interface/input.sol @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +pragma solidity >=0.0; + +interface C { + function f() external; +} diff --git a/test/cmdlineTests/ethdebug_on_interface/output b/test/cmdlineTests/ethdebug_on_interface/output new file mode 100644 index 000000000000..744ec0e77642 --- /dev/null +++ b/test/cmdlineTests/ethdebug_on_interface/output @@ -0,0 +1,19 @@ +======= Debug Data (ethdebug/format/info/resources) ======= +{ + "compilation": { + "compiler": { + "name": "solc", + "version": "" + }, + "sources": [ + { + "id": 0, + "path": "input.sol" + } + ] + } +} + +======= input.sol:C ======= +Debug Data (ethdebug/format/program): +null diff --git a/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/input.json b/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/input.json new file mode 100644 index 000000000000..06b2d86e96f6 --- /dev/null +++ b/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/input.json @@ -0,0 +1,27 @@ +{ + "language": "Solidity", + "sources": { + "a.sol": { + "content": "// SPDX-License-Identifier: GPL-2.0\npragma solidity >=0.0;\n\nabstract contract C {\n function f() public virtual returns (bytes32);\n}\n" + }, + "b.sol": { + "content": "// SPDX-License-Identifier: GPL-2.0\npragma solidity >=0.0;\n\ninterface C {\n function f() external;\n}\n" + } + }, + "settings": { + "viaIR": true, + "debug": { + "debugInfo": [ + "ethdebug" + ] + }, + "outputSelection": { + "*": { + "*": [ + "evm.bytecode.ethdebug", + "evm.deployedBytecode.ethdebug" + ] + } + } + } +} diff --git a/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/output.json b/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/output.json new file mode 100644 index 000000000000..57b26680146a --- /dev/null +++ b/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/output.json @@ -0,0 +1,54 @@ +{ + "contracts": { + "a.sol": { + "C": { + "evm": { + "bytecode": { + "ethdebug": null + }, + "deployedBytecode": { + "ethdebug": null + } + } + } + }, + "b.sol": { + "C": { + "evm": { + "bytecode": { + "ethdebug": null + }, + "deployedBytecode": { + "ethdebug": null + } + } + } + } + }, + "ethdebug": { + "compilation": { + "compiler": { + "name": "solc", + "version": "" + }, + "sources": [ + { + "id": 0, + "path": "a.sol" + }, + { + "id": 1, + "path": "b.sol" + } + ] + } + }, + "sources": { + "a.sol": { + "id": 0 + }, + "b.sol": { + "id": 1 + } + } +} From e0f9da3c9ae49563cf6d6ced745f8ef53ebeca3b Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Wed, 16 Apr 2025 14:18:26 +0200 Subject: [PATCH 10/12] Compare ethdebug output to program schema --- .circleci/config.yml | 18 +++++++ test/ethdebugSchemaTests/conftest.py | 49 ++++++++++++++++++ test/ethdebugSchemaTests/input_file.json | 27 ++++++++++ test/ethdebugSchemaTests/input_file_eof.json | 29 +++++++++++ .../test_ethdebug_schema_conformity.py | 51 +++++++++++++++++++ 5 files changed, 174 insertions(+) create mode 100644 test/ethdebugSchemaTests/conftest.py create mode 100644 test/ethdebugSchemaTests/input_file.json create mode 100644 test/ethdebugSchemaTests/input_file_eof.json create mode 100755 test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py diff --git a/.circleci/config.yml b/.circleci/config.yml index 6c88b9d19f7c..def71c0bf96d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -952,6 +952,8 @@ jobs: chk_pylint: <<: *base_ubuntu2404_small steps: + - install_python3: + packages: pyyaml jsonschema pytest - checkout - run: pylint --version - run: @@ -1535,6 +1537,19 @@ jobs: - reports/externalTests/ - matrix_notify_failure_unless_pr + t_ethdebug_output_validity: + <<: *base_node_small + steps: + - checkout + - attach_workspace: + at: /tmp/workspace + - install_python3: + packages: pyyaml jsonschema pytest + - run: + name: Ethdebug validity tests + command: | + pytest test/ethdebugSchemaTests --solc-binary-path=/tmp/workspace/solc/solc-static-linux -v + c_ext_benchmarks: <<: *base_node_small steps: @@ -1928,6 +1943,9 @@ workflows: #- t_ext: *job_native_compile_ext_chainlink #- t_ext: *job_native_compile_ext_bleeps + - t_ethdebug_output_validity: + <<: *requires_b_ubu_static + - c_ext_benchmarks: <<: *requires_nothing requires: diff --git a/test/ethdebugSchemaTests/conftest.py b/test/ethdebugSchemaTests/conftest.py new file mode 100644 index 000000000000..11b43bb4a16b --- /dev/null +++ b/test/ethdebugSchemaTests/conftest.py @@ -0,0 +1,49 @@ +import shutil +import subprocess +from pathlib import Path + +import pytest +import referencing +import yaml +from referencing.jsonschema import DRAFT202012 + + +def pytest_addoption(parser): + parser.addoption("--solc-binary-path", type=Path, required=True, help="Path to the solidity compiler binary.") + + +@pytest.fixture +def solc_path(request): + solc_path = request.config.getoption("--solc-binary-path") + assert solc_path.is_file() + assert solc_path.exists() + return solc_path + + +@pytest.fixture(scope="module") +def ethdebug_clone_dir(tmpdir_factory): + temporary_dir = Path(tmpdir_factory.mktemp("data")) + yield temporary_dir + shutil.rmtree(temporary_dir) + + +@pytest.fixture(scope="module") +def ethdebug_schema_repository(ethdebug_clone_dir): + process = subprocess.run( + ["git", "clone", "https://github.com/ethdebug/format.git", ethdebug_clone_dir], + encoding='utf8', + capture_output=True, + check=True + ) + assert process.returncode == 0 + + registry = referencing.Registry() + for path in (ethdebug_clone_dir / "schemas").rglob("*.yaml"): + with open(path, "r", encoding="utf8") as f: + schema = yaml.safe_load(f) + if "$id" in schema: + resource = referencing.Resource.from_contents(schema, DRAFT202012) + registry = resource @ registry + else: + raise ValueError(f"Schema did not define an $id: {path}") + return registry diff --git a/test/ethdebugSchemaTests/input_file.json b/test/ethdebugSchemaTests/input_file.json new file mode 100644 index 000000000000..7daf7afd6852 --- /dev/null +++ b/test/ethdebugSchemaTests/input_file.json @@ -0,0 +1,27 @@ +{ + "language": "Solidity", + "sources": { + "a.sol": { + "content": "//SPDX-License-Identifier: GPL-3.0\npragma solidity >=0.0;\ncontract A1 { function a(uint x) public pure { assert(x > 0); } } contract A2 { function a(uint x) public pure { assert(x > 0); } }" + }, + "b.sol": { + "content": "//SPDX-License-Identifier: GPL-3.0\npragma solidity >=0.0;\ncontract A1 { function b(uint x) public pure { assert(x > 0); } } contract B2 { function b(uint x) public pure { assert(x > 0); } }" + } + }, + "settings": { + "viaIR": true, + "debug": { + "debugInfo": [ + "ethdebug" + ] + }, + "outputSelection": { + "*": { + "*": [ + "evm.bytecode.ethdebug", + "evm.deployedBytecode.ethdebug" + ] + } + } + } +} diff --git a/test/ethdebugSchemaTests/input_file_eof.json b/test/ethdebugSchemaTests/input_file_eof.json new file mode 100644 index 000000000000..0462065327f8 --- /dev/null +++ b/test/ethdebugSchemaTests/input_file_eof.json @@ -0,0 +1,29 @@ +{ + "language": "Solidity", + "sources": { + "a.sol": { + "content": "//SPDX-License-Identifier: GPL-3.0\npragma solidity >=0.0;\ncontract A1 { function a(uint x) public pure { assert(x > 0); } } contract A2 { function a(uint x) public pure { assert(x > 0); } }" + }, + "b.sol": { + "content": "//SPDX-License-Identifier: GPL-3.0\npragma solidity >=0.0;\ncontract A1 { function b(uint x) public pure { assert(x > 0); } } contract B2 { function b(uint x) public pure { assert(x > 0); } }" + } + }, + "settings": { + "eofVersion": 1, + "evmVersion": "osaka", + "viaIR": true, + "debug": { + "debugInfo": [ + "ethdebug" + ] + }, + "outputSelection": { + "*": { + "*": [ + "evm.bytecode.ethdebug", + "evm.deployedBytecode.ethdebug" + ] + } + } + } +} diff --git a/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py b/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py new file mode 100755 index 000000000000..ae1ab92df4e7 --- /dev/null +++ b/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 + +import json +import subprocess +from pathlib import Path + +import jsonschema +import pytest + + +def get_nested_value(dictionary, *keys): + for key in keys: + dictionary = dictionary[key] + return dictionary + + +@pytest.fixture(params=["input_file.json", "input_file_eof.json"]) +def solc_output(request, solc_path): + testfile_dir = Path(__file__).parent + with open(testfile_dir / request.param, "r", encoding="utf8") as f: + source = json.load(f) + + process = subprocess.run( + [solc_path, "--standard-json"], + input=json.dumps(source), + encoding='utf8', + capture_output=True, + check=True, + ) + assert process.returncode == 0 + return json.loads(process.stdout) + + +@pytest.mark.parametrize("output_selection", ["evm.bytecode.ethdebug", "evm.deployedBytecode.ethdebug"], ids=str) +def test_program_schema( + output_selection, + ethdebug_schema_repository, + solc_output +): + validator = jsonschema.Draft202012Validator( + schema={"$ref": "schema:ethdebug/format/program"}, + registry=ethdebug_schema_repository + ) + assert "contracts" in solc_output + for contract in solc_output["contracts"].keys(): + contract_output = solc_output["contracts"][contract] + assert len(contract_output) > 0 + for source in contract_output.keys(): + source_output = contract_output[source] + ethdebug_data = get_nested_value(source_output, *(output_selection.split("."))) + validator.validate(ethdebug_data) From aa880e75a53bafbd45b623e48b36235543125c52 Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Mon, 12 May 2025 09:29:06 +0200 Subject: [PATCH 11/12] IPFS hash and swarm hash use string_view as input --- libsolutil/IpfsHash.cpp | 4 ++-- libsolutil/IpfsHash.h | 5 +++-- libsolutil/SwarmHash.h | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/libsolutil/IpfsHash.cpp b/libsolutil/IpfsHash.cpp index 3599b7121830..5ed0c766c1af 100644 --- a/libsolutil/IpfsHash.cpp +++ b/libsolutil/IpfsHash.cpp @@ -152,7 +152,7 @@ bytes groupChunksBottomUp(Chunks _currentLevel) } } -bytes solidity::util::ipfsHash(std::string _data) +bytes solidity::util::ipfsHash(std::string_view _data) { size_t const maxChunkSize = 1024 * 256; size_t chunkCount = _data.length() / maxChunkSize + (_data.length() % maxChunkSize > 0 ? 1 : 0); @@ -196,7 +196,7 @@ bytes solidity::util::ipfsHash(std::string _data) return groupChunksBottomUp(std::move(allChunks)); } -std::string solidity::util::ipfsHashBase58(std::string _data) +std::string solidity::util::ipfsHashBase58(std::string_view _data) { return base58Encode(ipfsHash(std::move(_data))); } diff --git a/libsolutil/IpfsHash.h b/libsolutil/IpfsHash.h index 977fb9836ba7..4e504ac89718 100644 --- a/libsolutil/IpfsHash.h +++ b/libsolutil/IpfsHash.h @@ -21,6 +21,7 @@ #include #include +#include namespace solidity::util { @@ -30,9 +31,9 @@ namespace solidity::util /// As hash function it will use sha2-256. /// The effect is that the hash should be identical to the one produced by /// the command `ipfs add `. -bytes ipfsHash(std::string _data); +bytes ipfsHash(std::string_view _data); /// Compute the "ipfs hash" as above, but encoded in base58 as used by ipfs / bitcoin. -std::string ipfsHashBase58(std::string _data); +std::string ipfsHashBase58(std::string_view _data); } diff --git a/libsolutil/SwarmHash.h b/libsolutil/SwarmHash.h index ed7a52292d39..11fa6ff0785b 100644 --- a/libsolutil/SwarmHash.h +++ b/libsolutil/SwarmHash.h @@ -33,7 +33,7 @@ h256 bzzr0Hash(std::string const& _input); /// Compute the "bzz hash" of @a _input (the NEW binary / BMT version) h256 bzzr1Hash(bytes const& _input); -inline h256 bzzr1Hash(std::string const& _input) +inline h256 bzzr1Hash(std::string_view const _input) { return bzzr1Hash(asBytes(_input)); } From 0313868e45516a531e8e7fb69c45cede35a4cc0b Mon Sep 17 00:00:00 2001 From: clonker <1685266+clonker@users.noreply.github.com> Date: Mon, 12 May 2025 18:42:28 +0200 Subject: [PATCH 12/12] Add compilation id, source contents, and language to ethdebug resource output --- libevmasm/Ethdebug.cpp | 19 -------- libevmasm/Ethdebug.h | 3 -- libevmasm/EthdebugSchema.cpp | 24 ++++++++++ libevmasm/EthdebugSchema.h | 36 +++++++++++++++ libsolidity/interface/CompilerStack.cpp | 44 ++++++++++++++++++- libsolidity/interface/StandardCompiler.cpp | 30 +++++++++++-- libsolidity/interface/StandardCompiler.h | 2 +- solc/CommandLineInterface.cpp | 22 +++++++++- .../ethdebug_eof_container_osaka/output | 3 ++ test/cmdlineTests/ethdebug_on_abstract/output | 3 ++ .../cmdlineTests/ethdebug_on_interface/output | 3 ++ .../output.json | 17 +++++++ .../output.json | 9 ++++ .../test_ethdebug_schema_conformity.py | 11 +++++ test/libevmasm/Assembler.cpp | 12 ----- 15 files changed, 195 insertions(+), 43 deletions(-) diff --git a/libevmasm/Ethdebug.cpp b/libevmasm/Ethdebug.cpp index 8b87aadd072a..d498f8204358 100644 --- a/libevmasm/Ethdebug.cpp +++ b/libevmasm/Ethdebug.cpp @@ -148,22 +148,3 @@ Json ethdebug::program(std::string_view _name, unsigned _sourceID, Assembly cons .instructions = programInstructions(_assembly, _linkerObject, _sourceID) }; } - -Json ethdebug::resources(std::vector const& _sources, std::string const& _version) -{ - Json sources = Json::array(); - for (size_t id = 0; id < _sources.size(); ++id) - { - Json source = Json::object(); - source["id"] = id; - source["path"] = _sources[id]; - sources.push_back(source); - } - Json result = Json::object(); - result["compilation"] = Json::object(); - result["compilation"]["compiler"] = Json::object(); - result["compilation"]["compiler"]["name"] = "solc"; - result["compilation"]["compiler"]["version"] = _version; - result["compilation"]["sources"] = sources; - return result; -} diff --git a/libevmasm/Ethdebug.h b/libevmasm/Ethdebug.h index 2e0df3484ba6..5e92143b4874 100644 --- a/libevmasm/Ethdebug.h +++ b/libevmasm/Ethdebug.h @@ -29,7 +29,4 @@ namespace solidity::evmasm::ethdebug // returns ethdebug/format/program. Json program(std::string_view _name, unsigned _sourceID, Assembly const& _assembly, LinkerObject const& _linkerObject); -// returns ethdebug/format/info/resources -Json resources(std::vector const& _sources, std::string const& _version); - } // namespace solidity::evmasm::ethdebug diff --git a/libevmasm/EthdebugSchema.cpp b/libevmasm/EthdebugSchema.cpp index c54b7167f53e..115b00c04b22 100644 --- a/libevmasm/EthdebugSchema.cpp +++ b/libevmasm/EthdebugSchema.cpp @@ -58,6 +58,23 @@ void schema::materials::to_json(Json& _json, SourceRange::Range const& _range) _json["offset"] = _range.offset; } +void schema::materials::to_json(Json& _json, Source const& _source) +{ + _json["id"] = _source.id; + _json["path"] = _source.path; + _json["contents"] = _source.contents; + if (_source.encoding) + _json["encoding"] = *_source.encoding; + _json["language"] = _source.language; +} + +void schema::materials::to_json(Json& _json, Compilation const& _compilation) +{ + _json["id"] = _compilation.id; + _json["compiler"]["name"] = _compilation.compiler.name; + _json["compiler"]["version"] = _compilation.compiler.version; + _json["sources"] = _compilation.sources; +} void schema::materials::to_json(Json& _json, SourceRange const& _sourceRange) { @@ -73,6 +90,13 @@ void schema::to_json(Json& _json, Program::Contract const& _contract) _json["definition"] = _contract.definition; } +void schema::info::to_json(Json& _json, Resources const& _resources) +{ + if (_resources.compilation) + _json["compilation"] = *_resources.compilation; +} + + void schema::program::to_json(Json& _json, Context::Variable const& _contextVariable) { auto const numProperties = diff --git a/libevmasm/EthdebugSchema.h b/libevmasm/EthdebugSchema.h index 5fc8f78fd62e..d994afa8f8c4 100644 --- a/libevmasm/EthdebugSchema.h +++ b/libevmasm/EthdebugSchema.h @@ -88,6 +88,35 @@ struct SourceRange std::optional range; }; +struct Source +{ + ID id; + std::string path; + std::string contents; + std::optional encoding; + std::string language; +}; + +struct Compilation +{ + struct Compiler + { + std::string name; + std::string version; + }; + ID id; + Compiler compiler; + std::vector sources; +}; + +} + +namespace info +{ +struct Resources +{ + std::optional compilation; +}; } namespace program @@ -155,6 +184,13 @@ void to_json(Json& _json, ID const& _id); void to_json(Json& _json, Reference const& _source); void to_json(Json& _json, SourceRange::Range const& _range); void to_json(Json& _json, SourceRange const& _sourceRange); +void to_json(Json& _json, Source const& _source); +void to_json(Json& _json, Compilation const& _compilation); +} + +namespace info +{ +void to_json(Json& _json, Resources const& _resources); } namespace program diff --git a/libsolidity/interface/CompilerStack.cpp b/libsolidity/interface/CompilerStack.cpp index 975ed3c1982d..4bbc6cf20afc 100644 --- a/libsolidity/interface/CompilerStack.cpp +++ b/libsolidity/interface/CompilerStack.cpp @@ -85,6 +85,7 @@ #include #include +#include #include @@ -1196,7 +1197,38 @@ Json CompilerStack::ethdebug() const { solAssert(m_stackState >= AnalysisSuccessful, "Analysis was not successful."); solAssert(!m_contracts.empty()); - return evmasm::ethdebug::resources(sourceNames(), VersionString); + + std::vector const paths = sourceNames(); + + std::vector sources; + sources.reserve(paths.size()); + + for (auto const& sourcePath: paths) + sources.push_back( + evmasm::ethdebug::schema::materials::Source{ + .id = {sourceIndices()[sourcePath]}, + .path = sourcePath, + .contents = source(sourcePath).charStream->source(), + .encoding = std::nullopt, + .language = "Solidity" + } + ); + + std::stringstream concatenatedMetadata; + for (auto const& contract: m_contracts | ranges::views::values) + concatenatedMetadata << metadata(contract); + evmasm::ethdebug::schema::info::Resources resources { + .compilation = evmasm::ethdebug::schema::materials::Compilation { + .id = {util::toHex(util::ipfsHash(concatenatedMetadata.str()), util::HexPrefix::Add)}, + .compiler = evmasm::ethdebug::schema::materials::Compilation::Compiler { + .name = "solc", + .version = VersionString + }, + .sources = sources + } + }; + + return resources; } Json CompilerStack::ethdebug(std::string const& _contractName) const @@ -1931,7 +1963,15 @@ bytes CompilerStack::createCBORMetadata(Contract const& _contract, bool _forIR) _contract.contract->sourceUnit().annotation().experimentalFeatures ); - std::string meta = (_forIR == m_viaIR ? metadata(_contract) : createMetadata(_contract, _forIR)); + std::string otherMeta; + std::string_view meta; + if (_forIR == m_viaIR) + meta = metadata(_contract); + else + { + otherMeta = createMetadata(_contract, _forIR); + meta = otherMeta; + } MetadataCBOREncoder encoder; diff --git a/libsolidity/interface/StandardCompiler.cpp b/libsolidity/interface/StandardCompiler.cpp index d5502228bda4..9615cc6b217e 100644 --- a/libsolidity/interface/StandardCompiler.cpp +++ b/libsolidity/interface/StandardCompiler.cpp @@ -37,13 +37,15 @@ #include +#include +#include #include #include -#include #include #include +#include #include using namespace solidity; @@ -1637,7 +1639,7 @@ Json StandardCompiler::compileSolidity(StandardCompiler::InputsAndSettings _inpu } -Json StandardCompiler::compileYul(InputsAndSettings _inputsAndSettings) +Json StandardCompiler::compileYul(InputsAndSettings _inputsAndSettings, Json const& _rawInput) { solAssert(_inputsAndSettings.jsonSources.empty()); @@ -1787,7 +1789,27 @@ Json StandardCompiler::compileYul(InputsAndSettings _inputsAndSettings) output["contracts"][sourceName][contractName]["yulCFGJson"] = stack.cfgJson(); if (isEthdebugRequested(_inputsAndSettings.outputSelection)) - output["ethdebug"] = evmasm::ethdebug::resources({sourceName}, VersionString); + { + std::string const id = util::toHex(ipfsHash(jsonCompactPrint(jsonCompactPrint(_rawInput) + jsonCompactPrint(_inputsAndSettings.sources))), HexPrefix::Add); + output["ethdebug"] = evmasm::ethdebug::schema::info::Resources { + .compilation = evmasm::ethdebug::schema::materials::Compilation { + .id = {id}, + .compiler = evmasm::ethdebug::schema::materials::Compilation::Compiler{ + .name = "solc", + .version = VersionString + }, + .sources = { + evmasm::ethdebug::schema::materials::Source { + .id = evmasm::ethdebug::schema::materials::ID{std::uint64_t{0}}, + .path = sourceName, + .contents = sourceContents, + .encoding = std::nullopt, + .language = "Yul" + } + } + } + }; + } return output; } @@ -1805,7 +1827,7 @@ Json StandardCompiler::compile(Json const& _input) noexcept if (settings.language == "Solidity") return compileSolidity(std::move(settings)); else if (settings.language == "Yul") - return compileYul(std::move(settings)); + return compileYul(std::move(settings), _input); else if (settings.language == "SolidityAST") return compileSolidity(std::move(settings)); else if (settings.language == "EVMAssembly") diff --git a/libsolidity/interface/StandardCompiler.h b/libsolidity/interface/StandardCompiler.h index 665e8b487158..4ab1367b7c14 100644 --- a/libsolidity/interface/StandardCompiler.h +++ b/libsolidity/interface/StandardCompiler.h @@ -98,7 +98,7 @@ class StandardCompiler std::map parseAstFromInput(StringMap const& _sources); Json importEVMAssembly(InputsAndSettings _inputsAndSettings); Json compileSolidity(InputsAndSettings _inputsAndSettings); - Json compileYul(InputsAndSettings _inputsAndSettings); + Json compileYul(InputsAndSettings _inputsAndSettings, Json const& _rawInput); ReadCallback::Callback m_readFile; diff --git a/solc/CommandLineInterface.cpp b/solc/CommandLineInterface.cpp index 4a7f34561fbe..56975c88459a 100644 --- a/solc/CommandLineInterface.cpp +++ b/solc/CommandLineInterface.cpp @@ -44,6 +44,7 @@ #include #include +#include #include #include @@ -1359,8 +1360,25 @@ void CommandLineInterface::assembleYul(yul::YulStack::Language _language, yul::Y { sout() << "======= Debug Data (ethdebug/format/info/resources) =======" << std::endl; sout() << util::jsonPrint( - evmasm::ethdebug::resources({{sourceUnitName}}, VersionString), - m_options.formatting.json + evmasm::ethdebug::schema::info::Resources{ + .compilation = evmasm::ethdebug::schema::materials::Compilation { + .id = {""}, + .compiler = evmasm::ethdebug::schema::materials::Compilation::Compiler { + .name = "solc", + .version = VersionString + }, + .sources = { + evmasm::ethdebug::schema::materials::Source { + .id = evmasm::ethdebug::schema::materials::ID{std::uint64_t{0}}, + .path = sourceUnitName, + .contents = yulSource, + .encoding = std::nullopt, + .language = "Yul" + } + } + } + }, + m_options.formatting.json ) << std::endl; } diff --git a/test/cmdlineTests/ethdebug_eof_container_osaka/output b/test/cmdlineTests/ethdebug_eof_container_osaka/output index e9b57d885711..1475614526a2 100644 --- a/test/cmdlineTests/ethdebug_eof_container_osaka/output +++ b/test/cmdlineTests/ethdebug_eof_container_osaka/output @@ -5,9 +5,12 @@ "name": "solc", "version": "" }, + "id": "0x1220a5451e57fefae301cb773787fc38d855d065067f1d80a2cf38ea4e8117f914f8", "sources": [ { + "contents": "// SPDX-License-Identifier: GPL-2.0\npragma solidity >=0.0;\n\ncontract C {\n function f() public {}\n}\n", "id": 0, + "language": "Solidity", "path": "input.sol" } ] diff --git a/test/cmdlineTests/ethdebug_on_abstract/output b/test/cmdlineTests/ethdebug_on_abstract/output index 744ec0e77642..d6d5ca661f09 100644 --- a/test/cmdlineTests/ethdebug_on_abstract/output +++ b/test/cmdlineTests/ethdebug_on_abstract/output @@ -5,9 +5,12 @@ "name": "solc", "version": "" }, + "id": "0x1220f18bade7c9fe1910e3cfe817d60dd1d42d85760d009fe087c91aac1aced8ed66", "sources": [ { + "contents": "// SPDX-License-Identifier: GPL-2.0\npragma solidity >=0.0;\n\nabstract contract C {\n function f() public virtual returns (bytes32);\n}\n", "id": 0, + "language": "Solidity", "path": "input.sol" } ] diff --git a/test/cmdlineTests/ethdebug_on_interface/output b/test/cmdlineTests/ethdebug_on_interface/output index 744ec0e77642..b9c5239660c8 100644 --- a/test/cmdlineTests/ethdebug_on_interface/output +++ b/test/cmdlineTests/ethdebug_on_interface/output @@ -5,9 +5,12 @@ "name": "solc", "version": "" }, + "id": "0x1220e2276c1de81ce47e074f4113acf7edc8160f0ac86ce866411800df43be570a94", "sources": [ { + "contents": "// SPDX-License-Identifier: GPL-2.0\npragma solidity >=0.0;\n\ninterface C {\n function f() external;\n}\n", "id": 0, + "language": "Solidity", "path": "input.sol" } ] diff --git a/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/output.json b/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/output.json index 57b26680146a..f9606a817bda 100644 --- a/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/output.json +++ b/test/cmdlineTests/standard_output_debuginfo_ethdebug_with_interfaces_and_abstracts/output.json @@ -31,13 +31,30 @@ "name": "solc", "version": "" }, + "id": "0x1220c96176b836fdd259516a205e7c0ca275f5fe488c02adfe5a324380bdaef43da6", "sources": [ { + "contents": "// SPDX-License-Identifier: GPL-2.0 +pragma solidity >=0.0; + +abstract contract C { + function f() public virtual returns (bytes32); +} +", "id": 0, + "language": "Solidity", "path": "a.sol" }, { + "contents": "// SPDX-License-Identifier: GPL-2.0 +pragma solidity >=0.0; + +interface C { + function f() external; +} +", "id": 1, + "language": "Solidity", "path": "b.sol" } ] diff --git a/test/cmdlineTests/standard_yul_ethdebug_assign_immutable/output.json b/test/cmdlineTests/standard_yul_ethdebug_assign_immutable/output.json index 3af82e21d392..76f784df332b 100644 --- a/test/cmdlineTests/standard_yul_ethdebug_assign_immutable/output.json +++ b/test/cmdlineTests/standard_yul_ethdebug_assign_immutable/output.json @@ -116,9 +116,18 @@ "name": "solc", "version": "" }, + "id": "0x1220567311cdfa80a7fa7e1849f45ddee044787d6bf880874638097ce6c9cc8a22b4", "sources": [ { + "contents": "object \"a\" { + code { + { + setimmutable(0, \"long___name___that___definitely___exceeds___the___thirty___two___byte___limit\", 0x1234567890123456789012345678901234567890) + } + } +}", "id": 0, + "language": "Yul", "path": "C" } ] diff --git a/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py b/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py index ae1ab92df4e7..4266452e2cc2 100755 --- a/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py +++ b/test/ethdebugSchemaTests/test_ethdebug_schema_conformity.py @@ -49,3 +49,14 @@ def test_program_schema( source_output = contract_output[source] ethdebug_data = get_nested_value(source_output, *(output_selection.split("."))) validator.validate(ethdebug_data) + +def test_resource_schema( + ethdebug_schema_repository, + solc_output +): + validator = jsonschema.Draft202012Validator( + schema={"$ref": "schema:ethdebug/format/info/resources"}, + registry=ethdebug_schema_repository + ) + assert "ethdebug" in solc_output + validator.validate(solc_output["ethdebug"]) diff --git a/test/libevmasm/Assembler.cpp b/test/libevmasm/Assembler.cpp index 32855cfbc211..8fc8e04f8ae4 100644 --- a/test/libevmasm/Assembler.cpp +++ b/test/libevmasm/Assembler.cpp @@ -454,18 +454,6 @@ BOOST_AUTO_TEST_CASE(ethdebug_program_last_instruction_with_immediate_arguments) } } -BOOST_AUTO_TEST_CASE(ethdebug_resources) -{ - Json const resources = ethdebug::resources({"sourceA", "sourceB"}, "version1"); - BOOST_REQUIRE(resources["compilation"]["compiler"]["name"] == "solc"); - BOOST_REQUIRE(resources["compilation"]["compiler"]["version"] == "version1"); - BOOST_REQUIRE(resources["compilation"]["sources"].size() == 2); - BOOST_REQUIRE(resources["compilation"]["sources"][0]["id"] == 0); - BOOST_REQUIRE(resources["compilation"]["sources"][0]["path"] == "sourceA"); - BOOST_REQUIRE(resources["compilation"]["sources"][1]["id"] == 1); - BOOST_REQUIRE(resources["compilation"]["sources"][1]["path"] == "sourceB"); -} - BOOST_AUTO_TEST_SUITE_END() } // end namespaces