Skip to content

Commit

Permalink
Discover sub-word encodings (#41)
Browse files Browse the repository at this point in the history
This commit adds a lifting pass that, where possible, discovers accesses
to portions of a word smaller than the entire word. This allows the
analyser to discover the size of types in many cases, but is also the
precursor work to supporting packed encodings into single storage slots.

In addition the codebase is now explicit about the byte ordering it is
constructed with, and maintains the "stored as little-endian" invariant
explicitly.

It also adds a test for packed encodings that is not yet properly
supported by the analyser, but is necessary for phase one of the
packed-encoding work.
  • Loading branch information
iamrecursion committed Jul 27, 2023
1 parent 5e9be2d commit b8584bf
Show file tree
Hide file tree
Showing 41 changed files with 3,726 additions and 1,048 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,12 @@ publish = false
# Build dependencies.
[dependencies]
bimap = "0.6.3"
bitvec = "1.0.1"
derivative = "2.2.0"
downcast-rs = "1.2.0"
ethnum = "1.3.2"
hex = "0.4.3"
itertools = "0.10.5"
serde = { version = "1.0.163", features = ["derive"] }
sha3 = "0.10.8"
thiserror = "1.0.40"
Expand All @@ -41,7 +43,6 @@ uuid = { version = "1.3.2", features = ["v4", "fast-rng", "macro-diagnostics"] }
# These are the dependencies required purely for internal development of the library such as testing or benchmarking.
[dev-dependencies]
anyhow = { version = "1.0.71", features = ["backtrace"] }
itertools = "0.10.5"
rand = "0.8.5"
serde_json = "1.0.96"

Expand Down
1,123 changes: 1,123 additions & 0 deletions asset/PackedEncodings.json

Large diffs are not rendered by default.

43 changes: 43 additions & 0 deletions asset/PackedEncodings.sol
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// SPDX-License-Identifier: UNLICENSED
pragma solidity ^0.8.13;

contract BytecodeExample {

// This one is only used in a storage slot, so should be indistinguishable
// from a packed encoding (done so we force it)
struct StructOne {
uint64 one;
uint128 two;
}

// This one we use in a dynamic array as well
struct StructTwo {
address addr;
bool isEnabled;
}

// This struct is just used directly.
StructOne internal data;

// Here we create a connection between a dynamic array and a storage slot.
// We should be able to infer a struct type for the slot.
StructTwo internal current;
StructTwo[] public history;

function set_data(uint64 one, uint128 two) public {
data.one = one;
data.two = two;
}

// Updates the current and adds the old one to the history
function new_current(address addr, bool isEnabled) public {
history.push(current);
current.addr = addr;
current.isEnabled = isEnabled;
}

// A getter for the current
function get_current() public view returns (StructTwo memory) {
return current;
}
}
513 changes: 343 additions & 170 deletions asset/ReplaceMeForTesting.json

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions asset/ReplaceMeForTesting.sol
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
pragma solidity ^0.8.13;

contract BytecodeExample {
mapping(uint256 => mapping(uint256 => uint256)) internal number;
uint256[] internal numbers;
mapping(uint128 => mapping(uint128 => uint256)) internal mappings;
uint64[] internal numbers;

function add(uint256 key, uint256 value) public {
number[key][key] = value;
function add(uint128 key, uint256 value) public {
mappings[key][key] = value;
}

function append(uint256 number) public {
function append(uint64 number) public {
numbers.push(number);
}

function read(uint256 index) public view returns (uint64) {
return numbers[index];
}
}
513 changes: 343 additions & 170 deletions asset/SimpleContract.json

Large diffs are not rendered by default.

14 changes: 9 additions & 5 deletions asset/SimpleContract.sol
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
pragma solidity ^0.8.13;

contract BytecodeExample {
mapping(uint256 => mapping(uint256 => uint256)) internal number;
uint256[] internal numbers;
mapping(uint128 => mapping(uint128 => uint256)) internal mappings;
uint64[] internal numbers;

function add(uint256 key, uint256 value) public {
number[key][key] = value;
function add(uint128 key, uint256 value) public {
mappings[key][key] = value;
}

function append(uint256 number) public {
function append(uint64 number) public {
numbers.push(number);
}

function read(uint256 index) public view returns (uint64) {
return numbers[index];
}
}
2 changes: 1 addition & 1 deletion docs/Extending the Analyzer.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ s_store(storage_slot(dynamic_array<storage_slot(base_slot)>[index]), value)
equating
- `d = dynamic_array<b>`
- `f = word(unsigned, unknown width, unknown usage)`
- `f = word(width = unknown, usage = UnsignedWord)`
- `b = g`
```

Expand Down
25 changes: 24 additions & 1 deletion src/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,16 @@ pub const PUSH_OPCODE_MAX_BYTES: u8 = 32;
pub const MAXIMUM_STACK_DEPTH: usize = 1024;

/// The width of word on the EVM in bits.
pub const WORD_SIZE: usize = 256;
pub const WORD_SIZE_BITS: usize = 256;

/// The width of a byte on the EVM (and most other places) in bits.
pub const BYTE_SIZE: usize = 8;

/// The width of a word on the EVM in bytes.
pub const WORD_SIZE_BYTES: usize = WORD_SIZE_BITS / BYTE_SIZE;

/// The bit-width of a bool type.
pub const BOOL_WIDTH_BITS: usize = BYTE_SIZE;

/// The bit-width of an address type.
pub const ADDRESS_WIDTH_BITS: usize = 160;
Expand All @@ -54,3 +63,17 @@ pub const FUNCTION_WIDTH_BITS: usize = ADDRESS_WIDTH_BITS + SELECTOR_WIDTH_BITS;

/// The default number of times that the virtual machine will visit each opcode.
pub const DEFAULT_ITERATIONS_PER_OPCODE: usize = 10;

/// The valid widths in bits of value types in solidity.
///
/// These are defined by the language specification to have widths `8 <= N <=
/// 256` where `N % 8 == 0`.
pub const SOLIDITY_VALUE_TYPE_WIDTHS: [usize; WORD_SIZE_BYTES] = {
let mut array: [usize; WORD_SIZE_BYTES] = [0; WORD_SIZE_BYTES];
let mut counter = 0usize;
while counter < WORD_SIZE_BYTES {
array[counter] = (counter + 1) * BYTE_SIZE;
counter += 1;
}
array
};
15 changes: 15 additions & 0 deletions src/disassembly/disassembler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ pub fn disassemble(bytes: &[u8]) -> Result<Vec<DynOpcode>> {

let mut opcodes: Vec<DynOpcode> = Vec::with_capacity(bytes.len());
let ops = &mut opcodes;
let mut last_push: u8 = 0;
let mut last_push_start: u32 = 0;
let mut push_size: u8 = 0;
let mut push_size_bytes: u8 = push_size;
Expand Down Expand Up @@ -92,6 +93,7 @@ pub fn disassemble(bytes: &[u8]) -> Result<Vec<DynOpcode>> {
// Now we can zero out our state variables.
push_bytes.clear();
push_size = 0;
last_push = 0;
}
} else {
// Now we can match the next byte and process the opcode.
Expand Down Expand Up @@ -162,6 +164,7 @@ pub fn disassemble(bytes: &[u8]) -> Result<Vec<DynOpcode>> {
0x5b => add_op(ops, control::JumpDest),
0x5f => add_op(ops, mem::Push0),
0x60..=0x7f => {
last_push = *byte;
last_push_start = instruction_pointer;
push_size = byte - PUSH_OPCODE_BASE_VALUE;
push_size_bytes = push_size;
Expand Down Expand Up @@ -202,6 +205,18 @@ pub fn disassemble(bytes: &[u8]) -> Result<Vec<DynOpcode>> {
}
}

// Solc has generated valid code that ends with an incomplete push, so we have
// to handle it by treating the unterminated push and all the subsequent bytes
// as invalid
if !push_bytes.is_empty() && push_bytes.len() != push_size as usize {
add_op(ops, control::Invalid::new(last_push));
push_bytes.iter().for_each(|b| add_op(ops, control::Invalid::new(*b)));
} else if push_size != 0 {
let opcode = mem::PushN::new(push_size, push_bytes.clone())
.map_err(|e| e.locate(last_push_start))?;
add_op(ops, opcode);
}

Ok(opcodes)
}

Expand Down
3 changes: 0 additions & 3 deletions src/disassembly/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,6 @@ impl<'a> TryFrom<&'a [u8]> for InstructionStream {

fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
let instructions = Rc::new(disassembler::disassemble(value)?);
if instructions.len() > u32::MAX as usize {
return Err(Error::BytecodeTooLarge.locate(0));
}
let result = Self { instructions };

// An assertion that will be disabled in production builds, but a good sanity
Expand Down
15 changes: 11 additions & 4 deletions src/inference/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,20 @@ pub enum AbiType {
/// is a concrete container but not of what type(s).
Any,

/// A number of a given `size` in bits, where, `8 < size <= 256 &&
/// size % 8 == 0`.
///
/// This is emitted when the analyser knows that something has been used
/// numerically, but does not know whether it is concretely signed or not.
Number { size: Option<usize> },

/// Unsigned integers of a given `size` in bits, where `8 < size <= 256 &&
/// size % 8 == 0`.
UInt { size: u16 },
UInt { size: Option<usize> },

/// Signed (two's complement) integers of a given `size` in bits, where `8 <
/// size <= 256 && size % 8 == 0`.
Int { size: u16 },
Int { size: Option<usize> },

/// Addresses, assumed equivalent to `UInt { size: 160 }` except for
/// interpretation.
Expand All @@ -58,7 +65,7 @@ pub enum AbiType {
},

/// Byte arrays of a fixed `length`, where `0 < length <= 32`.
Bytes { length: u8 },
Bytes { length: Option<usize> },

/// A dynamically-sized array containing elements of a type `tp`.
DynArray {
Expand Down Expand Up @@ -86,7 +93,7 @@ pub enum AbiType {
///
/// While the conflict is not usually useful itself, treating them as types
/// ensures that we still complete unification as well as is possible.
ConflictedType { left: String, right: String, reason: String },
ConflictedType { conflicts: Vec<String>, reasons: Vec<String> },
}

/// The `U256Wrapper` is responsible for serializing the U256 type to JSON
Expand Down
Loading

0 comments on commit b8584bf

Please sign in to comment.