From 3ac47c459e939385c1cd92f2ed0b63b9fba43b25 Mon Sep 17 00:00:00 2001 From: Alan Li Date: Fri, 15 Nov 2019 21:08:44 -0500 Subject: [PATCH 1/8] Make assembler recognize labels. --- pyevmasm/evmasm.py | 60 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/pyevmasm/evmasm.py b/pyevmasm/evmasm.py index 3ac396a..c49454f 100644 --- a/pyevmasm/evmasm.py +++ b/pyevmasm/evmasm.py @@ -329,7 +329,17 @@ def is_arithmetic(self): 'ADD', 'MUL', 'SUB', 'DIV', 'SDIV', 'MOD', 'SMOD', 'ADDMOD', 'MULMOD', 'EXP', 'SIGNEXTEND', 'SHL', 'SHR', 'SAR'} -def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK): +def is_push(instr): + return (instr._opcode >= 0x60) and (instr._opcode <= 0x6F) + +def is_digit(operand): + try: + int(operand, 0) + return True + except: + return False + +def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK, fillins={}): """ Assemble one EVM instruction from its textual representation. :param asmcode: assembly code for one instruction @@ -355,13 +365,25 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK): instr.pc = pc if instr.operand_size > 0: assert len(asmcode) == 2 - instr.operand = int(asmcode[1], 0) + operand = asmcode[1] + if is_push(instr) and not is_digit(operand): + # instantiating a label, fill it with zeros instead + instr.operand = 0 + if operand in fillins: + fillins[operand].append(pc) + else: + fillins[operand] = [pc] + else: + instr.operand = int(asmcode[1], 0) return instr except: raise AssembleError("Something wrong at pc %d" % pc) +def fixup_instr(instr, label_offset): + assert is_push(instr) + instr.operand = label_offset -def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK): +def assemble_all(asmcode, pc=1, fork=DEFAULT_FORK): """ Assemble a sequence of textual representation of EVM instructions :param asmcode: assembly code for any number of instructions @@ -390,13 +412,39 @@ def assemble_all(asmcode, pc=0, fork=DEFAULT_FORK): """ asmcode = asmcode.split('\n') asmcode = iter(asmcode) + + # we use a dictionary to record label locations: + labels = {} + # another dictionary to record which instruction + # we need to fill in. + fillins = {} + # we have to traverse the generated instruction twice + # so no use of generator here + instrs = [] + for line in asmcode: if not line.strip(): continue - instr = assemble_one(line, pc=pc, fork=fork) - yield instr + if line.endswith(":"): + # this is a label, record it with location (PC) + labels[line[:-1]] = pc + continue + instr = assemble_one(line, pc=pc, fork=fork, fillins=fillins) + instrs.append(instr) pc += instr.size + # fixup instructions + for label in labels: + if label not in fillins.keys(): + continue + for instr in instrs: + if instr._pc in fillins[label]: + label_pc = labels[label] + fixup_instr(instr, label_pc) + + # to keep it compatible with existing APIs + for instr in instrs: + yield instr def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK): """ Disassemble a single instruction from a bytecode @@ -513,7 +561,7 @@ def disassemble(bytecode, pc=0, fork=DEFAULT_FORK): return '\n'.join(map(str, disassemble_all(bytecode, pc=pc, fork=fork))) -def assemble(asmcode, pc=0, fork=DEFAULT_FORK): +def assemble(asmcode, pc=1, fork=DEFAULT_FORK): """ Assemble an EVM program :param asmcode: an evm assembler program From dd51af354ad9327752b0cd9294b3b5c62d8e1697 Mon Sep 17 00:00:00 2001 From: Alan Li Date: Fri, 15 Nov 2019 21:34:51 -0500 Subject: [PATCH 2/8] Update test case. --- tests/test_EVMAssembler.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) mode change 100644 => 100755 tests/test_EVMAssembler.py diff --git a/tests/test_EVMAssembler.py b/tests/test_EVMAssembler.py old mode 100644 new mode 100755 index e04c1e1..d24d757 --- a/tests/test_EVMAssembler.py +++ b/tests/test_EVMAssembler.py @@ -43,6 +43,30 @@ def test_ADD_1(self): asmcode = EVMAsm.disassemble_hex('0x608040526002610100') self.assertEqual(asmcode, '''PUSH1 0x80\nBLOCKHASH\nMSTORE\nPUSH1 0x2\nPUSH2 0x100''') + def test_label(self): + bytecode = EVMAsm.assemble_hex(""" +Start: + PUSH1 Return + PUSH1 0x11 + PUSH1 0x22 + PUSH2 Function + JUMP +Return: + JUMPDEST + PUSH1 0x00 + MSTORE + PUSH1 0x20 + PUSH1 0x00 + RETURN +Function: + JUMPDEST + ADD + SWAP1 + JUMP + """) + self.assertEqual(bytecode, + '0x600a60116022610013565b60005260206000f35b019056') + def test_STOP(self): insn = EVMAsm.disassemble_one(b'\x00') self.assertTrue(insn.mnemonic == 'STOP') From 56cc71e20e974cb16a3d5bfa8900e87c1c59ffd9 Mon Sep 17 00:00:00 2001 From: Alan Li Date: Fri, 15 Nov 2019 21:40:01 -0500 Subject: [PATCH 3/8] The EVM memory space starts at 1 rather than 0. --- pyevmasm/evmasm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyevmasm/evmasm.py b/pyevmasm/evmasm.py index c49454f..b30cd3e 100644 --- a/pyevmasm/evmasm.py +++ b/pyevmasm/evmasm.py @@ -491,7 +491,7 @@ def disassemble_one(bytecode, pc=0, fork=DEFAULT_FORK): return instruction -def disassemble_all(bytecode, pc=0, fork=DEFAULT_FORK): +def disassemble_all(bytecode, pc=1, fork=DEFAULT_FORK): """ Disassemble all instructions in bytecode :param bytecode: an evm bytecode (binary) From 1908a60dc793966cb25b0d0676e0a9f0e435aaaf Mon Sep 17 00:00:00 2001 From: Alan Li Date: Sun, 17 Nov 2019 16:57:20 -0500 Subject: [PATCH 4/8] Small improvements: support comments "#". --- pyevmasm/evmasm.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pyevmasm/evmasm.py b/pyevmasm/evmasm.py index b30cd3e..691a405 100644 --- a/pyevmasm/evmasm.py +++ b/pyevmasm/evmasm.py @@ -1,7 +1,7 @@ from bisect import bisect from binascii import hexlify, unhexlify from builtins import map, next, range, object -from future.builtins import next, bytes +from builtins import next, bytes import copy DEFAULT_FORK = "petersburg" @@ -423,7 +423,13 @@ def assemble_all(asmcode, pc=1, fork=DEFAULT_FORK): instrs = [] for line in asmcode: - if not line.strip(): + # remove comments + index = line.find("#") + if index is not -1: + line = line[:index] + # remove excessive trailing spaces + line = line.strip() + if not line: continue if line.endswith(":"): # this is a label, record it with location (PC) From 0b1def20360511d985a73e4cfd0200e3dcc4e833 Mon Sep 17 00:00:00 2001 From: Alan Li Date: Sun, 17 Nov 2019 22:10:09 -0500 Subject: [PATCH 5/8] Remove directives as well. --- pyevmasm/evmasm.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/pyevmasm/evmasm.py b/pyevmasm/evmasm.py index 691a405..6718a5b 100644 --- a/pyevmasm/evmasm.py +++ b/pyevmasm/evmasm.py @@ -423,18 +423,27 @@ def assemble_all(asmcode, pc=1, fork=DEFAULT_FORK): instrs = [] for line in asmcode: + line = line.strip() + + # skip empty lines + if not line: + continue + # remove comments index = line.find("#") if index is not -1: line = line[:index] - # remove excessive trailing spaces - line = line.strip() - if not line: + + # skip directives: + if line.find(".") is 0: continue + + # handle labels if line.endswith(":"): # this is a label, record it with location (PC) labels[line[:-1]] = pc continue + instr = assemble_one(line, pc=pc, fork=fork, fillins=fillins) instrs.append(instr) pc += instr.size From e1057e14d1b67767547beaeb979064ef2fa5ef3e Mon Sep 17 00:00:00 2001 From: Alan Li Date: Mon, 18 Nov 2019 18:50:03 -0500 Subject: [PATCH 6/8] Fix labeling issue. --- pyevmasm/evmasm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyevmasm/evmasm.py b/pyevmasm/evmasm.py index 6718a5b..3d353ad 100644 --- a/pyevmasm/evmasm.py +++ b/pyevmasm/evmasm.py @@ -365,7 +365,7 @@ def assemble_one(asmcode, pc=0, fork=DEFAULT_FORK, fillins={}): instr.pc = pc if instr.operand_size > 0: assert len(asmcode) == 2 - operand = asmcode[1] + operand = asmcode[1].strip() if is_push(instr) and not is_digit(operand): # instantiating a label, fill it with zeros instead instr.operand = 0 From 31503e4458e0032c852c31a2f906d4039e8a279c Mon Sep 17 00:00:00 2001 From: Alan Li Date: Sat, 23 Nov 2019 21:57:14 -0500 Subject: [PATCH 7/8] Self operand can take negative value. --- pyevmasm/evmasm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyevmasm/evmasm.py b/pyevmasm/evmasm.py index 3d353ad..ca20faf 100644 --- a/pyevmasm/evmasm.py +++ b/pyevmasm/evmasm.py @@ -181,8 +181,7 @@ def operand(self): @operand.setter def operand(self, value): if self.operand_size != 0 and value is not None: - mask = (1 << self.operand_size * 8) - 1 - if ~mask & value: + if value.bit_length() > self.operand_size * 8: raise ValueError("operand should be %d bits long" % (self.operand_size * 8)) self._operand = value From f0f2d55bffd5920588ba3b6254e98c0c075b3dee Mon Sep 17 00:00:00 2001 From: Alan Li Date: Tue, 17 Dec 2019 18:06:28 -0500 Subject: [PATCH 8/8] Adding fix up for `deploy.size`. --- pyevmasm/evmasm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyevmasm/evmasm.py b/pyevmasm/evmasm.py index ca20faf..e6b3f14 100644 --- a/pyevmasm/evmasm.py +++ b/pyevmasm/evmasm.py @@ -447,6 +447,9 @@ def assemble_all(asmcode, pc=1, fork=DEFAULT_FORK): instrs.append(instr) pc += instr.size + # size of the contract is the current PC + labels["deploy.size"] = pc - 1 + # fixup instructions for label in labels: if label not in fillins.keys():