diff --git a/.gitignore b/.gitignore index f93136374..fe608cde3 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,10 @@ __pycache__/ # C extensions *.so +*.pyd + +# Cython generated +ethereum/pow/ethash_cy.c # qkchash binaries qkchash/qkchash diff --git a/README.md b/README.md index 45586d9a1..802a9c9f3 100644 --- a/README.md +++ b/README.md @@ -71,9 +71,12 @@ To install the required modules for the project. Under `pyquarkchain` dir where # you may want to set the following if cryptography complains about header files: (https://github.com/pyca/cryptography/issues/3489) # export CPPFLAGS=-I/usr/local/opt/openssl/include # export LDFLAGS=-L/usr/local/opt/openssl/lib -pip install -e . +pip install -r requirements.txt +python setup.py build_ext --inplace ``` +The second command builds the optional Cython extension (`ethash_cy`) that speeds up ethash `calc_dataset_item` by ~20x. It requires a C compiler. If the build is skipped, the pure-Python fallback is used automatically. + Once all the modules are installed, try running all the unit tests under `pyquarkchain` ``` diff --git a/ethereum/pow/ethash.py b/ethereum/pow/ethash.py index 7e7a0848a..7237cd1fc 100644 --- a/ethereum/pow/ethash.py +++ b/ethereum/pow/ethash.py @@ -1,94 +1,156 @@ -import copy +import os +import numpy as np from functools import lru_cache from typing import Callable, Dict, List -from ethereum.pow.ethash_utils import * +from ethereum.pow.ethash_utils import ( + ethash_sha3_512, ethash_sha3_256, + FNV_PRIME, HASH_BYTES, WORD_BYTES, MIX_BYTES, + DATASET_PARENTS, CACHE_ROUNDS, ACCESSES, EPOCH_LENGTH, +) + +# uint32 overflow is intentional in FNV arithmetic +np.seterr(over="ignore") + +_FNV_PRIME = np.uint32(FNV_PRIME) + +# Optional Cython inner loop for calc_dataset_item. Falls back to pure numpy +# when the compiled extension isn't built (e.g. source checkouts without a +# C compiler). +try: + from ethereum.pow.ethash_cy import mix_parents as _cy_mix_parents +except ImportError: # pragma: no cover + _cy_mix_parents = None cache_seeds = [b"\x00" * 32] # type: List[bytes] -def mkcache(cache_size: int, block_number) -> List[List[int]]: + +# --------------------------------------------------------------------------- +# ETHASH_LIB selects the implementation used for non-test PoW verification. +# "ethash" — pure-Python + numpy (always available) +# "ethash_cy" — Cython + C keccak (requires python setup.py build_ext) +# Default: auto-detect best available (ethash_cy → ethash) +# --------------------------------------------------------------------------- +ETHASH_LIB = os.environ.get("ETHASH_LIB", "auto") + +if ETHASH_LIB == "auto": + try: + from ethereum.pow.ethash_cy import cy_hashimoto_light # noqa: F401 + ETHASH_LIB = "ethash_cy" + except ImportError: + ETHASH_LIB = "ethash" + +if ETHASH_LIB == "ethash": + @lru_cache(2) + def _get_cache(seed: bytes, n: int) -> np.ndarray: + """Returns cache as uint32 ndarray of shape (n, 16).""" + o = np.empty((n, 16), dtype=np.uint32) + o[0] = ethash_sha3_512(seed) + for i in range(1, n): + o[i] = ethash_sha3_512(o[i - 1]) + for _ in range(CACHE_ROUNDS): + for i in range(n): + v = int(o[i, 0]) % n + xored = o[(i - 1 + n) % n] ^ o[v] + o[i] = ethash_sha3_512(xored) + return o + + def hashimoto_light( + full_size: int, cache: np.ndarray, header: bytes, nonce: bytes + ) -> Dict: + return hashimoto(header, nonce, full_size, lambda x: calc_dataset_item(cache, x)) + +elif ETHASH_LIB == "ethash_cy": + from ethereum.pow.ethash_cy import cy_hashimoto_light, cy_mkcache + + @lru_cache(2) + def _get_cache(seed: bytes, n: int): + return cy_mkcache(np.frombuffer(seed, dtype=np.uint8), n) + + def hashimoto_light( + full_size: int, cache: np.ndarray, header: bytes, nonce: bytes + ) -> Dict: + return cy_hashimoto_light( + full_size, cache, + np.frombuffer(header, dtype=np.uint8), + np.frombuffer(nonce, dtype=np.uint8), + ) + +else: + raise ValueError(f"Unknown ETHASH_LIB={ETHASH_LIB!r}. " + f"Use 'ethash', 'ethash_cy', or 'auto'.") + + +def mkcache(cache_size: int, block_number) -> np.ndarray: while len(cache_seeds) <= block_number // EPOCH_LENGTH: - new_seed = serialize_hash(ethash_sha3_256(cache_seeds[-1])) + new_seed = ethash_sha3_256(cache_seeds[-1]).tobytes() cache_seeds.append(new_seed) seed = cache_seeds[block_number // EPOCH_LENGTH] return _get_cache(seed, cache_size // HASH_BYTES) -@lru_cache(10) -def _get_cache(seed, n) -> List[List[int]]: - # Sequentially produce the initial dataset - o = [ethash_sha3_512(seed)] - for i in range(1, n): - o.append(ethash_sha3_512(o[-1])) - - # Use a low-round version of randmemohash - for _ in range(CACHE_ROUNDS): - for i in range(n): - v = o[i][0] % n - o[i] = ethash_sha3_512(list(map(xor, o[(i - 1 + n) % n], o[v]))) - - return o - - -def calc_dataset_item(cache: List[List[int]], i: int) -> List[int]: +def calc_dataset_item(cache: np.ndarray, i: int) -> np.ndarray: n = len(cache) - r = HASH_BYTES // WORD_BYTES - # initialize the mix - mix = copy.copy(cache[i % n]) # type: List[int] - mix[0] ^= i + mix = cache[i % n].copy() + mix[0] ^= i # numpy auto-converts int, no explicit np.uint32() boxing mix = ethash_sha3_512(mix) - # fnv it with a lot of random cache nodes based on i - for j in range(DATASET_PARENTS): - cache_index = fnv(i ^ j, mix[j % r]) - mix = list(map(fnv, mix, cache[cache_index % n])) + if _cy_mix_parents is not None: + # mix is already C-contiguous uint32[16] (it's a fresh ndarray from + # ethash_sha3_512). cache rows are also contiguous uint32[16]. + _cy_mix_parents(mix, cache, i) + else: + r = HASH_BYTES // WORD_BYTES # 16 + for j in range(DATASET_PARENTS): + cache_index = ((i ^ j) * FNV_PRIME ^ int(mix[j % r])) & 0xFFFFFFFF + mix *= _FNV_PRIME # in-place: no temp array allocation + mix ^= cache[cache_index % n] # in-place: no temp array allocation return ethash_sha3_512(mix) -def calc_dataset(full_size, cache) -> List[List[int]]: - o = [] - for i in range(full_size // HASH_BYTES): - o.append(calc_dataset_item(cache, i)) - return o +def calc_dataset(full_size, cache: np.ndarray) -> np.ndarray: + rows = full_size // HASH_BYTES + out = np.empty((rows, 16), dtype=np.uint32) + for i in range(rows): + out[i] = calc_dataset_item(cache, i) + return out def hashimoto( header: bytes, nonce: bytes, full_size: int, - dataset_lookup: Callable[[int], List[int]], + dataset_lookup: Callable[[int], np.ndarray], ) -> Dict: n = full_size // HASH_BYTES w = MIX_BYTES // WORD_BYTES mixhashes = MIX_BYTES // HASH_BYTES - # combine header+nonce into a 64 byte seed - s = ethash_sha3_512(header + nonce[::-1]) - mix = [] - for _ in range(MIX_BYTES // HASH_BYTES): - mix.extend(s) - # mix in random dataset nodes + + s = ethash_sha3_512(header + nonce[::-1]) # (16,) uint32 + mix = np.tile(s, mixhashes) # (32,) uint32 + s0 = int(s[0]) # hoist constant, avoid repeated unboxing + newdata = np.empty(w, dtype=np.uint32) # pre-allocate, reused every iteration + for i in range(ACCESSES): - p = fnv(i ^ s[0], mix[i % w]) % (n // mixhashes) * mixhashes - newdata = [] - for j in range(mixhashes): - newdata.extend(dataset_lookup(p + j)) - mix = list(map(fnv, mix, newdata)) - # compress mix - cmix = [] - for i in range(0, len(mix), 4): - cmix.append(fnv(fnv(fnv(mix[i], mix[i + 1]), mix[i + 2]), mix[i + 3])) + p = ((i ^ s0) * FNV_PRIME ^ int(mix[i % w])) & 0xFFFFFFFF + p = p % (n // mixhashes) * mixhashes + for j in range(mixhashes): # avoid np.concatenate alloc+copy + newdata[j * 16:(j + 1) * 16] = dataset_lookup(p + j) + mix *= _FNV_PRIME # in-place: no temp array + mix ^= newdata # in-place: no temp array + + mix_r = mix.reshape(-1, 4) + cmix = mix_r[:, 0] * _FNV_PRIME ^ mix_r[:, 1] + cmix = cmix * _FNV_PRIME ^ mix_r[:, 2] + cmix = cmix * _FNV_PRIME ^ mix_r[:, 3] + + s_cmix = np.concatenate([s, cmix]) return { - b"mix digest": serialize_hash(cmix), - b"result": serialize_hash(ethash_sha3_256(s + cmix)), + b"mix digest": cmix.tobytes(), + b"result": ethash_sha3_256(s_cmix).tobytes(), } -def hashimoto_light( - full_size: int, cache: List[List[int]], header: bytes, nonce: bytes -) -> Dict: - return hashimoto(header, nonce, full_size, lambda x: calc_dataset_item(cache, x)) - - -def hashimoto_full(dataset: List[List[int]], header: bytes, nonce: bytes) -> Dict: +def hashimoto_full(dataset: np.ndarray, header: bytes, nonce: bytes) -> Dict: return hashimoto(header, nonce, len(dataset) * HASH_BYTES, lambda x: dataset[x]) diff --git a/ethereum/pow/ethash_cy.pyx b/ethereum/pow/ethash_cy.pyx new file mode 100644 index 000000000..39d2c4585 --- /dev/null +++ b/ethereum/pow/ethash_cy.pyx @@ -0,0 +1,229 @@ +# cython: language_level=3 +# cython: boundscheck=False +# cython: wraparound=False +# cython: cdivision=True +# cython: initializedcheck=False +""" +Cython-accelerated Ethash routines. + +R3 — ``mix_parents``: inner loop of calc_dataset_item (256-iter FNV mixing). +R4 — ``cy_calc_dataset_item``, ``cy_hashimoto_light``: full functions that + call C keccak directly, eliminating all Python-layer overhead for hashing. +""" + +import numpy as np +cimport numpy as cnp +cimport cython +from libc.stdint cimport uint8_t, uint32_t, uint64_t +from libc.string cimport memcpy, memset + +cnp.import_array() + +# ---------- C keccak (keccak_tiny.c, linked at build time) ---------- +cdef extern from "keccak_tiny.h": + int keccak_256(uint8_t* out, size_t outlen, + const uint8_t* inp, size_t inlen) nogil + int keccak_512(uint8_t* out, size_t outlen, + const uint8_t* inp, size_t inlen) nogil + +# ---------- Ethash constants ---------- +cdef uint32_t FNV_PRIME = 0x01000193u + +cdef enum: + _DATASET_PARENTS = 256 + _R = 16 # HASH_BYTES / WORD_BYTES + _HASH_BYTES = 64 + _MIX_BYTES = 128 + _ACCESSES = 64 + _MIX_WORDS = 32 # MIX_BYTES / WORD_BYTES + _MIX_HASHES = 2 # MIX_BYTES / HASH_BYTES + _CACHE_ROUNDS = 3 + +# ---------- Inline C helpers ---------- + +cdef inline void _keccak_512_u32(uint32_t* out, const uint32_t* inp) noexcept nogil: + """keccak-512: 64 bytes in → 16 uint32 out.""" + keccak_512(out, 64, inp, 64) + +cdef inline void _keccak_512_bytes(uint32_t* out, + const uint8_t* inp, + size_t inlen) noexcept nogil: + """keccak-512: arbitrary bytes in → 16 uint32 out.""" + keccak_512(out, 64, inp, inlen) + +cdef inline void _keccak_256_u32(uint32_t* out, + const uint32_t* inp, + size_t n_u32) noexcept nogil: + """keccak-256: n_u32 uint32 words in → 8 uint32 out.""" + keccak_256(out, 32, inp, n_u32 * 4) + + +# ===================================================================== +# cy_mkcache — build ethash cache using C keccak +# ===================================================================== + +def cy_mkcache(const uint8_t[::1] seed, Py_ssize_t n): + """Build ethash cache: n rows of 16 uint32, using C keccak-512. + + Parameters + ---------- + seed : bytes (as uint8 array) + 32-byte seed for this epoch. + n : int + Number of cache rows (cache_size // HASH_BYTES). + + Returns + ------- + numpy ndarray of shape (n, 16), dtype uint32. + """ + result = np.empty((n, 16), dtype=np.uint32) + cdef uint32_t[:, ::1] o = result + cdef uint32_t* ptr = &o[0, 0] + cdef Py_ssize_t i, rnd + cdef uint32_t v + cdef uint32_t xored[16] + + # o[0] = keccak_512(seed) + keccak_512(ptr, 64, &seed[0], seed.shape[0]) + + # o[i] = keccak_512(o[i-1]) + for i in range(1, n): + _keccak_512_u32(&ptr[i * _R], &ptr[(i - 1) * _R]) + + # CACHE_ROUNDS of RandMemoHash + for rnd in range(_CACHE_ROUNDS): + for i in range(n): + v = ptr[i * _R] % n + # xored = o[(i-1+n) % n] ^ o[v] + for k in range(_R): + xored[k] = ptr[(((i - 1 + n) % n) * _R) + k] ^ ptr[(v * _R) + k] + _keccak_512_u32(&ptr[i * _R], xored) + + return result + + +# ===================================================================== +# R3 — mix_parents (kept for backward compatibility) +# ===================================================================== + +@cython.boundscheck(False) +@cython.wraparound(False) +def mix_parents(uint32_t[::1] mix, + const uint32_t[:, ::1] cache, + uint64_t i): + """In-place parent mixing for one dataset item (R3 API).""" + cdef Py_ssize_t n = cache.shape[0] + cdef Py_ssize_t j, k + cdef uint32_t cache_index, mix_word + cdef uint32_t i32 = i + + for j in range(_DATASET_PARENTS): + mix_word = mix[j % _R] + cache_index = ((i32 ^ j) * FNV_PRIME) ^ mix_word + cache_index = cache_index % n + for k in range(_R): + mix[k] = (mix[k] * FNV_PRIME) ^ cache[cache_index, k] + + +# ===================================================================== +# R4 — full calc_dataset_item + hashimoto_light in C/Cython +# ===================================================================== + +cdef inline void _calc_dataset_item(uint32_t* out, + const uint32_t* cache, + Py_ssize_t n, + uint32_t idx) noexcept nogil: + """Pure C calc_dataset_item. Writes 16 uint32 to *out*.""" + cdef uint32_t mix[16] + cdef Py_ssize_t j, k + cdef uint32_t cache_index, mix_word + + # mix = cache[idx % n]; mix[0] ^= idx + memcpy(mix, &cache[(idx % n) * _R], 64) + mix[0] ^= idx + # mix = keccak_512(mix) + _keccak_512_u32(mix, mix) + # parent mixing + for j in range(_DATASET_PARENTS): + mix_word = mix[j % _R] + cache_index = ((idx ^ j) * FNV_PRIME) ^ mix_word + cache_index = cache_index % n + for k in range(_R): + mix[k] = (mix[k] * FNV_PRIME) ^ cache[(cache_index * _R) + k] + # mix = keccak_512(mix) + _keccak_512_u32(out, mix) + + +def cy_calc_dataset_item(const uint32_t[:, ::1] cache, uint32_t i): + """Python-callable calc_dataset_item (R4). Returns ndarray uint32[16].""" + cdef Py_ssize_t n = cache.shape[0] + result = np.empty(16, dtype=np.uint32) + cdef uint32_t[::1] result_view = result + _calc_dataset_item(&result_view[0], &cache[0, 0], n, i) + return result + + +def cy_hashimoto_light(Py_ssize_t full_size, + const uint32_t[:, ::1] cache, + const uint8_t[::1] header, + const uint8_t[::1] nonce): + """Full hashimoto_light in Cython+C (R4). + + Returns dict identical to the Python version: + {b"mix digest": bytes(32), b"result": bytes(32)} + """ + cdef Py_ssize_t n = full_size // _HASH_BYTES + cdef Py_ssize_t i, j, k, p + cdef uint32_t s0 + cdef uint32_t s[16] + cdef uint32_t mix[_MIX_WORDS] # 32 uint32 + cdef uint32_t newdata[_MIX_WORDS] + cdef uint32_t cmix[8] + cdef uint32_t s_cmix[24] # s(16) + cmix(8) + cdef uint32_t result_hash[8] + cdef Py_ssize_t cache_n = cache.shape[0] + cdef const uint32_t* cache_ptr = &cache[0, 0] + + # nonce_rev = nonce[::-1] + cdef Py_ssize_t header_len = header.shape[0] + cdef Py_ssize_t nonce_len = nonce.shape[0] + cdef uint8_t seed_buf[128] # header (up to ~80) + nonce (8) + memcpy(seed_buf, &header[0], header_len) + # reverse nonce + for i in range(nonce_len): + seed_buf[header_len + i] = nonce[nonce_len - 1 - i] + + # s = keccak_512(header + nonce[::-1]) + _keccak_512_bytes(s, seed_buf, header_len + nonce_len) + + # mix = tile(s, 2) + memcpy(mix, s, 64) + memcpy(&mix[16], s, 64) + + s0 = s[0] + + for i in range(_ACCESSES): + p = (((i ^ s0) * FNV_PRIME) ^ mix[i % _MIX_WORDS]) + p = (p % (n // _MIX_HASHES)) * _MIX_HASHES + for j in range(_MIX_HASHES): + _calc_dataset_item(&newdata[j * _R], cache_ptr, cache_n, (p + j)) + for k in range(_MIX_WORDS): + mix[k] = (mix[k] * FNV_PRIME) ^ newdata[k] + + # compress mix → cmix (8 uint32) + for i in range(8): + cmix[i] = mix[i * 4] + cmix[i] = (cmix[i] * FNV_PRIME) ^ mix[i * 4 + 1] + cmix[i] = (cmix[i] * FNV_PRIME) ^ mix[i * 4 + 2] + cmix[i] = (cmix[i] * FNV_PRIME) ^ mix[i * 4 + 3] + + # result = keccak_256(s + cmix) + memcpy(s_cmix, s, 64) + memcpy(&s_cmix[16], cmix, 32) + _keccak_256_u32(result_hash, s_cmix, 24) + + # Return as Python dict with bytes values + return { + b"mix digest": (cmix)[:32], + b"result": (result_hash)[:32], + } diff --git a/ethereum/pow/ethash_utils.py b/ethereum/pow/ethash_utils.py index fa6dd0dac..b482b14e6 100644 --- a/ethereum/pow/ethash_utils.py +++ b/ethereum/pow/ethash_utils.py @@ -1,29 +1,16 @@ -from typing import List, Union +from typing import Union +from Crypto.Hash import keccak -from eth_utils import encode_hex, decode_hex +import numpy as np -try: - from Crypto.Hash import keccak - def _sha3_256(x): - return keccak.new(digest_bits=256, data=x).digest() +def _sha3_256(x): + return keccak.new(digest_bits=256, data=x).digest() - def _sha3_512(x): - return keccak.new(digest_bits=512, data=x).digest() +def _sha3_512(x): + return keccak.new(digest_bits=512, data=x).digest() -except Exception: - import sha3 as _sha3 - - def _sha3_256(x): - return _sha3.sha3_256(x).digest() - - def _sha3_512(x): - return _sha3.sha3_512(x).digest() - - -import sys - WORD_BYTES = 4 # bytes in word DATASET_BYTES_INIT = 2 ** 30 # bytes in dataset at genesis DATASET_BYTES_GROWTH = 2 ** 23 # growth per epoch (~7 GB per year) @@ -39,73 +26,18 @@ def _sha3_512(x): FNV_PRIME = 0x01000193 -def fnv(v1, v2): - return (v1 * FNV_PRIME ^ v2) % 2 ** 32 - - -# Assumes little endian bit ordering (same as Intel architectures) -def decode_int(s): - return int(encode_hex(s[::-1]), 16) if s else 0 - - -def encode_int(s): - a = "%x" % s - return b"" if s == 0 else decode_hex("0" * (len(a) % 2) + a)[::-1] - - -def zpad(s, length): - return s + b"\x00" * max(0, length - len(s)) - - -def serialize_hash(h: List[int]) -> bytes: - return b"".join([zpad(encode_int(x), 4) for x in h]) - - -def deserialize_hash(h: bytes) -> List[int]: - return [decode_int(h[i : i + WORD_BYTES]) for i in range(0, len(h), WORD_BYTES)] - - -def hash_words(h, sz, x) -> List[int]: - if isinstance(x, list): - x = serialize_hash(x) - y = h(x) - return deserialize_hash(y) - - -def to_bytes(x): - if sys.version_info.major > 2 and isinstance(x, str): - x = bytes(x, "utf-8") - return x - - -def xor(a, b): - return a ^ b - - -# sha3 hash function, outputs 64 bytes -def ethash_sha3_512(x: Union[bytes, List[int]]) -> List[int]: - return hash_words(lambda v: _sha3_512(to_bytes(v)), 64, x) - - -def ethash_sha3_256(x: Union[bytes, List[int]]) -> List[int]: - return hash_words(lambda v: _sha3_256(to_bytes(v)), 32, x) - - -# Works for dataset and cache -def serialize_cache(ds): - return b"".join([serialize_hash(h) for h in ds]) - - -serialize_dataset = serialize_cache - - -def deserialize_cache(ds): - return [ - deserialize_hash(ds[i : i + HASH_BYTES]) for i in range(0, len(ds), HASH_BYTES) - ] +def ethash_sha3_512(x: Union[bytes, np.ndarray]) -> np.ndarray: + """sha3-512: bytes or ndarray in, uint32 ndarray (16,) out.""" + if isinstance(x, np.ndarray): + x = x.tobytes() + return np.frombuffer(_sha3_512(x), dtype=np.uint32).copy() -deserialize_dataset = deserialize_cache +def ethash_sha3_256(x: Union[bytes, np.ndarray]) -> np.ndarray: + """sha3-256: bytes or ndarray in, uint32 ndarray (8,) out.""" + if isinstance(x, np.ndarray): + x = x.tobytes() + return np.frombuffer(_sha3_256(x), dtype=np.uint32).copy() def isprime(x): diff --git a/ethereum/pow/ethpow.py b/ethereum/pow/ethpow.py index 5493d53d9..afb74359f 100644 --- a/ethereum/pow/ethpow.py +++ b/ethereum/pow/ethpow.py @@ -1,64 +1,24 @@ -import warnings +from typing import Tuple, Optional from functools import lru_cache -from typing import Tuple, Optional, List, Union - -from eth_utils import big_endian_to_int from ethereum.pow import ethash from ethereum.pow.ethash_utils import get_full_size, get_cache_size, EPOCH_LENGTH -try: - import pyethash - - ETHASH_LIB = "pyethash" # the C++ based implementation -except ImportError: - ETHASH_LIB = "ethash" - warnings.warn("using pure python implementation", ImportWarning) - -# always have python implementation declared -def get_cache_slow(cache_size: int, block_number: int) -> List[List[int]]: +def get_cache(cache_size: int, block_number: int): return ethash.mkcache(cache_size, block_number) -def hashimoto_slow( +def hashimoto( block_number: int, full_size: int, - cache: Union[List[List[int]], bytes], + cache, mining_hash: bytes, bin_nonce: bytes, ): return ethash.hashimoto_light(full_size, cache, mining_hash, bin_nonce) -if ETHASH_LIB == "ethash": - get_cache = get_cache_slow - hashimoto = hashimoto_slow -elif ETHASH_LIB == "pyethash": - - @lru_cache(10) - def calculate_cache(n): - return pyethash.mkcache_bytes(n * EPOCH_LENGTH) - - def get_cache(cache_size: int, block_number: int): - return calculate_cache(block_number // EPOCH_LENGTH) - - def hashimoto( - block_number: int, - full_size: int, - cache: Union[List[List[int]], bytes], - mining_hash: bytes, - bin_nonce: bytes, - ): - return pyethash.hashimoto_light( - block_number, cache, mining_hash, big_endian_to_int(bin_nonce) - ) - - -else: - raise Exception("invalid ethash library set") - - @lru_cache(maxsize=32) def check_pow( block_number, header_hash, mixhash, nonce, difficulty, is_test=False @@ -67,20 +27,16 @@ def check_pow( if len(mixhash) != 32 or len(header_hash) != 32 or len(nonce) != 8: return False - cache_gen, mining_gen = get_cache, hashimoto if is_test: cache_size, full_size = 1024, 32 * 1024 - # use python implementation to allow overriding cache & dataset size - cache_gen = get_cache_slow - mining_gen = hashimoto_slow else: cache_size, full_size = ( get_cache_size(block_number), get_full_size(block_number), ) - cache = cache_gen(cache_size, block_number) - mining_output = mining_gen(block_number, full_size, cache, header_hash, nonce) + cache = get_cache(cache_size, block_number) + mining_output = hashimoto(block_number, full_size, cache, header_hash, nonce) if mining_output[b"mix digest"] != mixhash: return False result = int.from_bytes(mining_output[b"result"], byteorder="big") @@ -125,25 +81,20 @@ def mine( rounds: int = 1000, is_test: bool = False, ) -> Tuple[Optional[bytes], Optional[bytes]]: - cache_gen, mining_gen = get_cache, hashimoto if is_test: cache_size, full_size = 1024, 32 * 1024 - # use python implementation to allow overriding cache & dataset size - cache_gen = get_cache_slow - mining_gen = hashimoto_slow else: cache_size, full_size = ( get_cache_size(block_number), get_full_size(block_number), ) - cache = cache_gen(cache_size, block_number) + cache = get_cache(cache_size, block_number) nonce = start_nonce target = (2 ** 256 // (difficulty or 1) - 1).to_bytes(32, byteorder="big") for i in range(1, rounds + 1): - # hashimoto expected big-indian byte representation bin_nonce = (nonce + i).to_bytes(8, byteorder="big") - o = mining_gen(block_number, full_size, cache, mining_hash, bin_nonce) + o = hashimoto(block_number, full_size, cache, mining_hash, bin_nonce) if o[b"result"] <= target: assert len(bin_nonce) == 8 assert len(o[b"mix digest"]) == 32 diff --git a/ethereum/pow/keccak_tiny.c b/ethereum/pow/keccak_tiny.c new file mode 100644 index 000000000..a0f56a4cb --- /dev/null +++ b/ethereum/pow/keccak_tiny.c @@ -0,0 +1,175 @@ +/** libkeccak-tiny + * + * A single-file implementation of SHA-3 and SHAKE. + * Copied from https://github.com/coruus/keccak-tiny + * + * Implementor: David Leon Gil + * License: CC0, attribution kindly requested. Blame taken too, + * but not liability. + * + * Local modifications for Ethash: + * 1. Added keccak_256/keccak_512 (original Keccak padding 0x01, + * as required by Ethash — distinct from NIST SHA-3 padding 0x06). + * 2. Replaced memset_s with portable secure_zero (volatile memset) + * for MSVC/GCC/Clang compatibility. + */ +#include "keccak_tiny.h" + +#include +#include + +/******** Portable secure memset ********/ +static void secure_zero(void *p, size_t len) { + volatile uint8_t *v = (volatile uint8_t *)p; + while (len--) *v++ = 0; +} + +/******** The Keccak-f[1600] permutation ********/ + +/*** Constants. ***/ +static const uint8_t rho[24] = + { 1, 3, 6, 10, 15, 21, + 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, + 62, 18, 39, 61, 20, 44}; +static const uint8_t pi[24] = + {10, 7, 11, 17, 18, 3, + 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, + 20, 14, 22, 9, 6, 1}; +static const uint64_t RC[24] = + {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, + 0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL, + 0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL}; + +/*** Helper macros to unroll the permutation. ***/ +#define rol(x, s) (((x) << s) | ((x) >> (64 - s))) +#define REPEAT6(e) e e e e e e +#define REPEAT24(e) REPEAT6(e e e e) +#define REPEAT5(e) e e e e e +#define FOR5(v, s, e) \ + v = 0; \ + REPEAT5(e; v += s;) + +/*** Keccak-f[1600] ***/ +static inline void keccakf(void* state) { + uint64_t* a = (uint64_t*)state; + uint64_t b[5] = {0}; + uint64_t t = 0; + uint8_t x, y; + + for (int i = 0; i < 24; i++) { + // Theta + FOR5(x, 1, + b[x] = 0; + FOR5(y, 5, + b[x] ^= a[x + y]; )) + FOR5(x, 1, + FOR5(y, 5, + a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); )) + // Rho and pi + t = a[1]; + x = 0; + REPEAT24(b[0] = a[pi[x]]; + a[pi[x]] = rol(t, rho[x]); + t = b[0]; + x++; ) + // Chi + FOR5(y, + 5, + FOR5(x, 1, + b[x] = a[y + x];) + FOR5(x, 1, + a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); )) + // Iota + a[0] ^= RC[i]; + } +} + +/******** The FIPS202-defined functions. ********/ + +/*** Some helper macros. ***/ + +#define _(S) do { S } while (0) +#define FOR(i, ST, L, S) \ + _(for (size_t i = 0; i < L; i += ST) { S; }) +#define mkapply_ds(NAME, S) \ + static inline void NAME(uint8_t* dst, \ + const uint8_t* src, \ + size_t len) { \ + FOR(i, 1, len, S); \ + } +#define mkapply_sd(NAME, S) \ + static inline void NAME(const uint8_t* src, \ + uint8_t* dst, \ + size_t len) { \ + FOR(i, 1, len, S); \ + } + +mkapply_ds(xorin, dst[i] ^= src[i]) // xorin +mkapply_sd(setout, dst[i] = src[i]) // setout + +#define P keccakf +#define Plen 200 + +// Fold P*F over the full blocks of an input. +#define foldP(I, L, F) \ + while (L >= rate) { \ + F(a, I, rate); \ + P(a); \ + I += rate; \ + L -= rate; \ + } + +/** The sponge-based hash construction. **/ +static inline int hash(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen, + size_t rate, uint8_t delim) { + if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate >= Plen)) { + return -1; + } + uint8_t a[Plen] = {0}; + // Absorb input. + foldP(in, inlen, xorin); + // Xor in the DS and pad frame. + a[inlen] ^= delim; + a[rate - 1] ^= 0x80; + // Xor in the last block. + xorin(a, in, inlen); + // Apply P + P(a); + // Squeeze output. + foldP(out, outlen, setout); + setout(a, out, outlen); + secure_zero(a, 200); + return 0; +} + +/* Original Keccak (padding 0x01) — used by Ethash */ +int keccak_256(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen) { + if (outlen > 32) return -1; + return hash(out, outlen, in, inlen, 200 - (256 / 4), 0x01); +} + +int keccak_512(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen) { + if (outlen > 64) return -1; + return hash(out, outlen, in, inlen, 200 - (512 / 4), 0x01); +} + +/* FIPS-202 SHA-3 (padding 0x06) */ +int sha3_256(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen) { + if (outlen > 32) return -1; + return hash(out, outlen, in, inlen, 200 - (256 / 4), 0x06); +} + +int sha3_512(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen) { + if (outlen > 64) return -1; + return hash(out, outlen, in, inlen, 200 - (512 / 4), 0x06); +} diff --git a/ethereum/pow/keccak_tiny.h b/ethereum/pow/keccak_tiny.h new file mode 100644 index 000000000..cc5d1628b --- /dev/null +++ b/ethereum/pow/keccak_tiny.h @@ -0,0 +1,24 @@ +/** libkeccak-tiny + * + * Copied from https://github.com/coruus/keccak-tiny + * See keccak_tiny.c for local modifications. + */ +#ifndef KECCAK_TINY_H +#define KECCAK_TINY_H + +#include +#include + +/* Original Keccak (padding 0x01) — used by Ethash */ +int keccak_256(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen); +int keccak_512(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen); + +/* FIPS-202 SHA-3 (padding 0x06) */ +int sha3_256(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen); +int sha3_512(uint8_t* out, size_t outlen, + const uint8_t* in, size_t inlen); + +#endif /* KECCAK_TINY_H */ diff --git a/ethereum/pow/tests/bench_hashimoto.py b/ethereum/pow/tests/bench_hashimoto.py new file mode 100644 index 000000000..23f72ecff --- /dev/null +++ b/ethereum/pow/tests/bench_hashimoto.py @@ -0,0 +1,61 @@ +"""Benchmark: pyethash.hashimoto_light (C++) vs ethash.hashimoto_light (pure Python). + +Run with: + python -m ethereum.pow.tests.bench_hashimoto +or: + python ethereum/pow/tests/bench_hashimoto.py + +Uses real mainnet parameters (block 0: cache ~16MB, full_size ~1GB) so that +pure Python and pyethash C++ are compared under identical conditions. +Note: pyethash may not be supported on Python 3.13, so this benchmark may fail to run on that version. +See https://github.com/QuarkChain/pyquarkchain/issues/976 +""" + +import timeit + +from eth_utils import big_endian_to_int + +from ethereum.pow.ethash import mkcache, hashimoto_light as py_hashimoto_light +from ethereum.pow.ethash_utils import get_cache_size, get_full_size + +BLOCK_NUMBER = 0 +HEADER = bytes(32) +NONCE = (0).to_bytes(8, byteorder="big") +ROUNDS = 100 + +# Use real mainnet parameters to match pyethash internal sizes +CACHE_SIZE = get_cache_size(BLOCK_NUMBER) +FULL_SIZE = get_full_size(BLOCK_NUMBER) + +cache = mkcache(CACHE_SIZE, BLOCK_NUMBER) + + +def bench_python(): + py_hashimoto_light(FULL_SIZE, cache, HEADER, NONCE) + + +results = {} + +elapsed = timeit.timeit(bench_python, number=ROUNDS) +results["pure Python"] = elapsed +print(f"Cache size: {CACHE_SIZE} bytes, Full size: {FULL_SIZE} bytes") +print(f"pure Python : {elapsed:.3f}s for {ROUNDS} calls ({elapsed/ROUNDS*1000:.1f} ms/call)") + +try: + import pyethash + + cpp_cache = pyethash.mkcache_bytes(BLOCK_NUMBER) + nonce_int = big_endian_to_int(NONCE) + print(f"pyethash cache size: {len(cpp_cache)} bytes") + + def bench_cpp(): + pyethash.hashimoto_light(BLOCK_NUMBER, cpp_cache, HEADER, nonce_int) + + elapsed_cpp = timeit.timeit(bench_cpp, number=ROUNDS) + results["pyethash (C++)"] = elapsed_cpp + print(f"pyethash C++: {elapsed_cpp:.3f}s for {ROUNDS} calls ({elapsed_cpp/ROUNDS*1000:.1f} ms/call)") + print(f"Speedup : {results['pure Python'] / elapsed_cpp:.1f}x") + +except ImportError: + print("pyethash not installed — skipping C++ comparison.") + print("Install with: pip install pyethash") diff --git a/ethereum/pow/tests/bench_hashimoto_compare.py b/ethereum/pow/tests/bench_hashimoto_compare.py new file mode 100644 index 000000000..25f070451 --- /dev/null +++ b/ethereum/pow/tests/bench_hashimoto_compare.py @@ -0,0 +1,421 @@ +""" +Benchmark suite: old (hex-based) vs R1 (struct+list) vs R2 (numpy) vs R3 (numpy+Cython) vs R4 (full Cython). + +- old: original hex-based implementation +- R1: struct.pack/unpack + Python list +- R2: struct.pack/unpack + numpy ndarray +- R3: R2 + Cython inner loop for calc_dataset_item (256-iter FNV mixing) +- R4: full Cython + C keccak (no Python overhead in hot path) + +Sections: + 1. mkcache build time + 2. Correctness assertions + 3. Primitive micro-benchmarks (serialize/fnv/sha3) + 4. calc_dataset_item throughput + 5. hashimoto_light throughput + 6. check_pow end-to-end + +Uses is_test=True sizes (cache=1024B, dataset=32KB) for fast iteration. +Run with: + PYTHONPATH=. python -m ethereum.pow.tests.bench_hashimoto_compare +""" +import copy +import struct +import time + +import numpy as np +from Crypto.Hash import keccak + +from . import old_ethash + +# --------------------------------------------------------------------------- +# Shared keccak +# --------------------------------------------------------------------------- +def _sha3_256(x): return keccak.new(digest_bits=256, data=x).digest() +def _sha3_512(x): return keccak.new(digest_bits=512, data=x).digest() + +WORD_BYTES = 4 +HASH_BYTES = 64 +MIX_BYTES = 128 +ACCESSES = 64 +DATASET_PARENTS = 256 +CACHE_ROUNDS = 3 +FNV_PRIME = 0x01000193 + +# =========================================================================== +# OLD — hex-based (imported from old_ethash.py, which is a copy of the original ethash.py) +# =========================================================================== +old_mkcache = old_ethash.mkcache +old_calc_dataset_item = old_ethash.calc_dataset_item +old_hashimoto_light = old_ethash.hashimoto_light +_old_fnv = old_ethash.fnv +_old_serialize_hash = old_ethash.serialize_hash +_old_deserialize_hash = old_ethash.deserialize_hash +_old_sha3_512 = old_ethash.sha3_512 +_old_sha3_256 = old_ethash.sha3_256 + +# =========================================================================== +# Round 1 — struct+list +# =========================================================================== +_FMT_16I = struct.Struct("<16I") +_FMT_8I = struct.Struct("<8I") +_FMT_32I = struct.Struct("<32I") +_MID_FNV_PRIME = FNV_PRIME + +def _r1_serialize(h): + n = len(h) + if n == 16: return _FMT_16I.pack(*h) + if n == 8: return _FMT_8I.pack(*h) + if n == 32: return _FMT_32I.pack(*h) + return struct.pack("<%dI" % n, *h) + +def _r1_deserialize(h): + n = len(h) + if n == 64: return list(_FMT_16I.unpack(h)) + if n == 32: return list(_FMT_8I.unpack(h)) + if n == 128: return list(_FMT_32I.unpack(h)) + return list(struct.unpack("<%dI" % (n // 4), h)) + +def _r1_sha3_512(x): + if isinstance(x, list): x = _r1_serialize(x) + return list(_FMT_16I.unpack(_sha3_512(x))) + +def _r1_sha3_256(x): + if isinstance(x, list): x = _r1_serialize(x) + return list(_FMT_8I.unpack(_sha3_256(x))) + +def _r1_fnv(v1, v2): + return (v1 * _MID_FNV_PRIME ^ v2) & 0xFFFFFFFF + +def r1_mkcache(cache_size, seed): + n = cache_size // HASH_BYTES + o = [_r1_sha3_512(seed)] + for i in range(1, n): + o.append(_r1_sha3_512(o[-1])) + for _ in range(CACHE_ROUNDS): + for i in range(n): + v = o[i][0] % n + o[i] = _r1_sha3_512([a ^ b for a, b in zip(o[(i - 1 + n) % n], o[v])]) + return o + +def r1_calc_dataset_item(cache, i): + n = len(cache) + r = HASH_BYTES // WORD_BYTES + mix = copy.copy(cache[i % n]) + mix[0] ^= i + mix = _r1_sha3_512(mix) + for j in range(DATASET_PARENTS): + cache_index = _r1_fnv(i ^ j, mix[j % r]) + mix = list(map(_r1_fnv, mix, cache[cache_index % n])) + return _r1_sha3_512(mix) + +def r1_hashimoto_light(full_size, cache, header, nonce): + n = full_size // HASH_BYTES + w = MIX_BYTES // WORD_BYTES + mixhashes = MIX_BYTES // HASH_BYTES + s = _r1_sha3_512(header + nonce[::-1]) + mix = list(s) * mixhashes + for i in range(ACCESSES): + p = _r1_fnv(i ^ s[0], mix[i % w]) % (n // mixhashes) * mixhashes + newdata = [] + for j in range(mixhashes): + newdata.extend(r1_calc_dataset_item(cache, p + j)) + mix = list(map(_r1_fnv, mix, newdata)) + cmix = [] + for i in range(0, len(mix), 4): + cmix.append(_r1_fnv(_r1_fnv(_r1_fnv(mix[i], mix[i+1]), mix[i+2]), mix[i+3])) + return { + b"mix digest": _r1_serialize(cmix), + b"result": _r1_serialize(_r1_sha3_256(s + cmix)), + } + +# =========================================================================== +# Round 2 — numpy ndarray (current implementation) +# =========================================================================== +np.seterr(over="ignore") + +from ethereum.pow.ethash import ( + mkcache as r2_mkcache, + hashimoto as _r2_hashimoto, +) +from ethereum.pow.ethash_utils import ethash_sha3_512 as _r2_sha3_512 + +_R2_FNV_PRIME = np.uint32(FNV_PRIME) + + +def r2_calc_dataset_item(cache: np.ndarray, i: int) -> np.ndarray: + """R2: pure-Python numpy calc_dataset_item.""" + n = len(cache) + r = HASH_BYTES // WORD_BYTES # 16 + mix = cache[i % n].copy() + mix[0] ^= i + mix = _r2_sha3_512(mix) + for j in range(DATASET_PARENTS): + cache_index = ((i ^ j) * FNV_PRIME ^ int(mix[j % r])) & 0xFFFFFFFF + mix *= _R2_FNV_PRIME + mix ^= cache[cache_index % n] + return _r2_sha3_512(mix) + +def r2_hashimoto_light(full_size, cache, header, nonce): + """R2: pure-Python hashimoto_light (numpy + pycryptodome keccak).""" + return _r2_hashimoto(header, nonce, full_size, lambda x: r2_calc_dataset_item(cache, x)) + + +# =========================================================================== +# Round 3 — numpy + Cython mix_parents +# =========================================================================== +try: + from ethereum.pow.ethash_cy import mix_parents as _cy_mix_parents + _has_cython = True +except ImportError: + _has_cython = False + + +def r3_calc_dataset_item(cache, i): + """R3: Cython inner loop (mix_parents only, Python sha3).""" + n = len(cache) + mix = cache[i % n].copy() + mix[0] ^= i + mix = _r2_sha3_512(mix) + _cy_mix_parents(mix, cache, i) + return _r2_sha3_512(mix) + + +def r3_hashimoto_light(full_size, cache, header, nonce): + """R3: hashimoto using r3_calc_dataset_item (Cython mix_parents + Python sha3).""" + return _r2_hashimoto(header, nonce, full_size, lambda x: r3_calc_dataset_item(cache, x)) + + +# =========================================================================== +# Round 4 — full Cython + C keccak (no Python overhead in hot path) +# =========================================================================== +try: + from ethereum.pow.ethash_cy import ( + cy_calc_dataset_item as r4_calc_dataset_item, + cy_hashimoto_light as _r4_hashimoto_light_raw, + ) + _has_r4 = True +except ImportError: + _has_r4 = False + + +def r4_hashimoto_light(full_size, cache, header, nonce): + """R4: full Cython hashimoto_light. Adapts bytes args to uint8 arrays.""" + return _r4_hashimoto_light_raw( + full_size, cache, + np.frombuffer(header, dtype=np.uint8), + np.frombuffer(nonce, dtype=np.uint8), + ) + +# =========================================================================== +# Micro-benchmark helpers +# =========================================================================== +def _bench(func, args, rounds=200_000): + for _ in range(1000): + func(*args) + t0 = time.perf_counter() + for _ in range(rounds): + func(*args) + return time.perf_counter() - t0 + +def _row3(label, fns_and_args, N): + times = [_bench(fn, args, N) for fn, args in fns_and_args] + t0 = times[0] + cols = "".join(f"{t:>10.4f}" for t in times) + ratios = "".join(f"{t0/t:>8.1f}x" for t in times[1:]) + print(f"{label:<30}{cols}{ratios}") + +def _row_partial(label, fns_and_args, N): + times = [_bench(fn, args, N) if fn is not None else None + for fn, args in fns_and_args] + t0 = times[0] + cols = "".join(f"{t:>10.4f}" if t is not None else f"{'N/A':>10}" for t in times) + ratios = "".join( + f"{t0/t:>8.1f}x" if t is not None else f"{'N/A':>8}" + for t in times[1:] + ) + print(f"{label:<30}{cols}{ratios}") + +# =========================================================================== +# Main +# =========================================================================== +if __name__ == "__main__": + CACHE_SIZE = 1024 + FULL_SIZE = 32 * 1024 + SEED = b"\x00" * 32 + HEADER = bytes.fromhex("c9149cc0386e689d789a1c2f3d5d169a61a6218ed30e74414dc736e442ef3d1f") + NONCE = (0).to_bytes(8, byteorder="big") + + # ---- build caches ---- + print("Building caches...") + t0 = time.perf_counter(); old_cache = old_mkcache(CACHE_SIZE, SEED); t_oc = time.perf_counter() - t0 + t0 = time.perf_counter(); r1_cache = r1_mkcache(CACHE_SIZE, SEED); t_mc = time.perf_counter() - t0 + t0 = time.perf_counter(); r2_cache = r2_mkcache(CACHE_SIZE, 0); t_nc = time.perf_counter() - t0 + print(f" mkcache old={t_oc*1000:.1f}ms R1={t_mc*1000:.1f}ms R2={t_nc*1000:.1f}ms " + f"old/R1={t_oc/t_mc:.1f}x old/R2={t_oc/t_nc:.1f}x") + + # ---- correctness ---- + old_r = old_hashimoto_light(FULL_SIZE, old_cache, HEADER, NONCE) + mid_r = r1_hashimoto_light(FULL_SIZE, r1_cache, HEADER, NONCE) + new_r = r2_hashimoto_light(FULL_SIZE, r2_cache, HEADER, NONCE) + assert old_r == mid_r, "old/R1 MISMATCH" + assert old_r == new_r, "old/R2 MISMATCH" + + if _has_cython: + for i in range(16): + r2_item = r2_calc_dataset_item(r2_cache, i) + r3_item = r3_calc_dataset_item(r2_cache, i) + assert np.array_equal(r2_item, r3_item), f"R2/R3 mismatch at item {i}" + if _has_r4: + for i in range(16): + r2_item = r2_calc_dataset_item(r2_cache, i) + r4_item = r4_calc_dataset_item(r2_cache, i) + assert np.array_equal(r2_item, r4_item), f"R2/R4 mismatch at item {i}" + r4_r = r4_hashimoto_light(FULL_SIZE, r2_cache, HEADER, NONCE) + assert old_r == r4_r, "old/R4 hashimoto MISMATCH" + cy_tag = "OK" if _has_cython else "SKIP" + r4_tag = "OK" if _has_r4 else "SKIP" + print(f" result match=OK R3={cy_tag} R4={r4_tag} mix={old_r[b'mix digest'].hex()[:16]}...\n") + + # ---- calc_dataset_item breakdown ---- + N2 = 300 + print(f"calc_dataset_item x{N2} calls") + + t0 = time.perf_counter() + for i in range(N2): old_calc_dataset_item(old_cache, i) + t_old_i = time.perf_counter() - t0 + + t0 = time.perf_counter() + for i in range(N2): r1_calc_dataset_item(r1_cache, i) + t_mid_i = time.perf_counter() - t0 + + t0 = time.perf_counter() + for i in range(N2): r2_calc_dataset_item(r2_cache, i) + t_r2_i = time.perf_counter() - t0 + + print(f" old {t_old_i:.3f}s {t_old_i/N2*1000:.2f}ms/call") + print(f" R1 {t_mid_i:.3f}s {t_mid_i/N2*1000:.2f}ms/call old/R1={t_old_i/t_mid_i:.2f}x") + print(f" R2 {t_r2_i:.3f}s {t_r2_i/N2*1000:.2f}ms/call old/R2={t_old_i/t_r2_i:.2f}x", end="") + if _has_cython: + t0 = time.perf_counter() + for i in range(N2): r3_calc_dataset_item(r2_cache, i) + t_r3_i = time.perf_counter() - t0 + print(f"\n R3 {t_r3_i:.3f}s {t_r3_i/N2*1000:.2f}ms/call old/R3={t_old_i/t_r3_i:.2f}x R2/R3={t_r2_i/t_r3_i:.1f}x", end="") + else: + print("\n R3 (skipped — Cython extension not built)", end="") + if _has_r4: + t0 = time.perf_counter() + for i in range(N2): r4_calc_dataset_item(r2_cache, i) + t_r4_i = time.perf_counter() - t0 + print(f"\n R4 {t_r4_i:.3f}s {t_r4_i/N2*1000:.2f}ms/call old/R4={t_old_i/t_r4_i:.2f}x R3/R4={t_r3_i/t_r4_i:.1f}x") + else: + print("\n R4 (skipped — Cython R4 not built)") + + # ---- hashimoto_light benchmark ---- + N = 30 + print(f"\nhashimoto_light x{N} calls (cache=1KB, dataset=32KB)") + + for _ in range(2): + old_hashimoto_light(FULL_SIZE, old_cache, HEADER, NONCE) + r1_hashimoto_light(FULL_SIZE, r1_cache, HEADER, NONCE) + r2_hashimoto_light(FULL_SIZE, r2_cache, HEADER, NONCE) + + t0 = time.perf_counter() + for i in range(N): old_hashimoto_light(FULL_SIZE, old_cache, HEADER, i.to_bytes(8, "big")) + t_old = time.perf_counter() - t0 + + t0 = time.perf_counter() + for i in range(N): r1_hashimoto_light(FULL_SIZE, r1_cache, HEADER, i.to_bytes(8, "big")) + t_mid = time.perf_counter() - t0 + + # R2: pure Python hashimoto_light (always the _slow variant) + for _ in range(2): + r2_hashimoto_light(FULL_SIZE, r2_cache, HEADER, NONCE) + t0 = time.perf_counter() + for i in range(N): r2_hashimoto_light(FULL_SIZE, r2_cache, HEADER, i.to_bytes(8, "big")) + t_r2 = time.perf_counter() - t0 + + print(f" old {t_old:.3f}s {t_old/N*1000:.1f}ms/call") + print(f" R1 {t_mid:.3f}s {t_mid/N*1000:.1f}ms/call old/R1={t_old/t_mid:.2f}x") + print(f" R2 {t_r2:.3f}s {t_r2/N*1000:.1f}ms/call old/R2={t_old/t_r2:.2f}x", end="") + if _has_cython: + # R3: hashimoto_light with Cython mix_parents + Python sha3 + for _ in range(2): + r3_hashimoto_light(FULL_SIZE, r2_cache, HEADER, NONCE) + t0 = time.perf_counter() + for i in range(N): r3_hashimoto_light(FULL_SIZE, r2_cache, HEADER, i.to_bytes(8, "big")) + t_r3 = time.perf_counter() - t0 + print(f"\n R3 {t_r3:.3f}s {t_r3/N*1000:.1f}ms/call old/R3={t_old/t_r3:.2f}x R2/R3={t_r2/t_r3:.1f}x", end="") + else: + print("\n R3 (skipped — Cython extension not built)", end="") + if _has_r4: + # R4: full Cython + C keccak + for _ in range(2): + r4_hashimoto_light(FULL_SIZE, r2_cache, HEADER, NONCE) + t0 = time.perf_counter() + for i in range(N): r4_hashimoto_light(FULL_SIZE, r2_cache, HEADER, i.to_bytes(8, "big")) + t_r4 = time.perf_counter() - t0 + print(f"\n R4 {t_r4:.3f}s {t_r4/N*1000:.1f}ms/call old/R4={t_old/t_r4:.2f}x R3/R4={t_r3/t_r4:.1f}x") + else: + print("\n R4 (skipped — Cython R4 not built)") + + # ---- primitive micro-benchmarks (old vs R1 vs R2) ---- + NM = 200_000 + hash_list_16 = [i * 1000003 & 0xFFFFFFFF for i in range(16)] + hash_list_8 = [i * 1000003 & 0xFFFFFFFF for i in range(8)] + hash_bytes_64 = _old_serialize_hash(hash_list_16) + hash_bytes_32 = _old_serialize_hash(hash_list_8) + + def _r2_sha3_512_list(x): + if isinstance(x, list): + x = _FMT_16I.pack(*x) + return _r2_sha3_512(x) + + print(f"\nprimitive micro-benchmarks x{NM:,} rounds") + print(f"{'Function':<30} {'Old (s)':>10} {'R1 (s)':>10} {'R2 (s)':>10} {'old/R1':>8} {'old/R2':>8}") + print("-" * 82) + _row_partial("serialize_hash (16 ints)", + [(_old_serialize_hash, (hash_list_16,)), + (_r1_serialize, (hash_list_16,)), + (None, None)], NM) + _row_partial("serialize_hash (8 ints)", + [(_old_serialize_hash, (hash_list_8,)), + (_r1_serialize, (hash_list_8,)), + (None, None)], NM) + _row_partial("deserialize_hash (64B)", + [(_old_deserialize_hash, (hash_bytes_64,)), + (_r1_deserialize, (hash_bytes_64,)), + (None, None)], NM) + _row_partial("deserialize_hash (32B)", + [(_old_deserialize_hash, (hash_bytes_32,)), + (_r1_deserialize, (hash_bytes_32,)), + (None, None)], NM) + _row_partial("fnv", + [(_old_fnv, (0xDEADBEEF, 0xCAFEBABE)), + (_r1_fnv, (0xDEADBEEF, 0xCAFEBABE)), + (None, None)], NM) + _row3("ethash_sha3_512 (bytes)", + [(_old_sha3_512, (hash_bytes_64,)), + (_r1_sha3_512, (hash_bytes_64,)), + (_r2_sha3_512, (hash_bytes_64,))], NM) + _row3("ethash_sha3_512 (list)", + [(_old_sha3_512, (hash_list_16,)), + (_r1_sha3_512, (hash_list_16,)), + (_r2_sha3_512_list, (hash_list_16,))], NM) + + # ---- check_pow end-to-end ---- + print("\ncheck_pow end-to-end (is_test=True)") + from ethereum.pow.ethpow import check_pow + _cp_header = b"\xca/\xf0l\xaa\xe7\xc9M\xc9h\xbe}v\xd0\xfb\xf6\r\xd2\xe1\x98\x9e\xe9\xbf\rY1\xe4\x85d\xd5\x14;" + _cp_nonce = (44).to_bytes(8, byteorder="big") + _cp_mix = bytes.fromhex("5dd318d2dff0aac95a3af5617db0bfb07eee8b0ab4a42f01d6161336be758106") + N3 = 20 + check_pow.cache_clear() + check_pow(1, _cp_header, _cp_mix, _cp_nonce, 100, is_test=True) + check_pow.cache_clear() + t0 = time.perf_counter() + for _ in range(N3): + check_pow.cache_clear() + check_pow(1, _cp_header, _cp_mix, _cp_nonce, 100, is_test=True) + t_cp = time.perf_counter() - t0 + print(f" x{N3}: {t_cp:.4f}s ({t_cp/N3*1000:.1f}ms/call)") diff --git a/ethereum/pow/tests/old_ethash.py b/ethereum/pow/tests/old_ethash.py new file mode 100644 index 000000000..b77cb464b --- /dev/null +++ b/ethereum/pow/tests/old_ethash.py @@ -0,0 +1,101 @@ +""" +Original hex-based ethash implementation, preserved as a reference baseline +for tests and benchmarks. +""" + +import copy + +from eth_utils import encode_hex, decode_hex +from Crypto.Hash import keccak + +WORD_BYTES = 4 +HASH_BYTES = 64 +MIX_BYTES = 128 +ACCESSES = 64 +DATASET_PARENTS = 256 +CACHE_ROUNDS = 3 +FNV_PRIME = 0x01000193 + + +def _sha3_256_raw(x): return keccak.new(digest_bits=256, data=x).digest() +def _sha3_512_raw(x): return keccak.new(digest_bits=512, data=x).digest() + + +def decode_int(s): + return int(encode_hex(s[::-1]), 16) if s else 0 + + +def encode_int(s): + a = "%x" % s + return b"" if s == 0 else decode_hex("0" * (len(a) % 2) + a)[::-1] + + +def serialize_hash(h): + return b"".join([encode_int(x).ljust(4, b"\x00") for x in h]) + + +def deserialize_hash(h): + return [decode_int(h[i:i + WORD_BYTES]) for i in range(0, len(h), WORD_BYTES)] + + +def sha3_512(x): + if isinstance(x, list): + x = serialize_hash(x) + return deserialize_hash(_sha3_512_raw(x)) + + +def sha3_256(x): + if isinstance(x, list): + x = serialize_hash(x) + return deserialize_hash(_sha3_256_raw(x)) + + +def fnv(v1, v2): + return (v1 * FNV_PRIME ^ v2) % 2 ** 32 + + +def mkcache(cache_size, seed): + n = cache_size // HASH_BYTES + o = [sha3_512(seed)] + for i in range(1, n): + o.append(sha3_512(o[-1])) + for _ in range(CACHE_ROUNDS): + for i in range(n): + v = o[i][0] % n + o[i] = sha3_512([a ^ b for a, b in zip(o[(i - 1 + n) % n], o[v])]) + return o + + +def calc_dataset_item(cache, i): + n = len(cache) + r = HASH_BYTES // WORD_BYTES + mix = copy.copy(cache[i % n]) + mix[0] ^= i + mix = sha3_512(mix) + for j in range(DATASET_PARENTS): + cache_index = fnv(i ^ j, mix[j % r]) + mix = list(map(fnv, mix, cache[cache_index % n])) + return sha3_512(mix) + + +def hashimoto_light(full_size, cache, header, nonce): + n = full_size // HASH_BYTES + w = MIX_BYTES // WORD_BYTES + mixhashes = MIX_BYTES // HASH_BYTES + s = sha3_512(header + nonce[::-1]) + mix = [] + for _ in range(mixhashes): + mix.extend(s) + for i in range(ACCESSES): + p = fnv(i ^ s[0], mix[i % w]) % (n // mixhashes) * mixhashes + newdata = [] + for j in range(mixhashes): + newdata.extend(calc_dataset_item(cache, p + j)) + mix = list(map(fnv, mix, newdata)) + cmix = [] + for i in range(0, len(mix), 4): + cmix.append(fnv(fnv(fnv(mix[i], mix[i+1]), mix[i+2]), mix[i+3])) + return { + b"mix digest": serialize_hash(cmix), + b"result": serialize_hash(sha3_256(s + cmix)), + } diff --git a/ethereum/pow/tests/test_ethash.py b/ethereum/pow/tests/test_ethash.py index d81c3bb69..e3a64dfd2 100644 --- a/ethereum/pow/tests/test_ethash.py +++ b/ethereum/pow/tests/test_ethash.py @@ -1,10 +1,23 @@ import unittest +import numpy as np + from ethereum.pow.ethash import mkcache, calc_dataset, hashimoto_light, hashimoto_full -from ethereum.pow.ethash_utils import EPOCH_LENGTH, HASH_BYTES, serialize_hash +from ethereum.pow.ethash_utils import EPOCH_LENGTH, HASH_BYTES from ethereum.pow.ethpow import EthashMiner, check_pow +class TestEthashUtils(unittest.TestCase): + """Test correctness of ethash_utils functions.""" + + def test_ethash_sha3_512_known_vector(self): + """ethash_sha3_512 with seed zero is stable across runs.""" + from ethereum.pow.ethash_utils import ethash_sha3_512 + seed = b"\x00" * 32 + result = ethash_sha3_512(seed) + self.assertEqual(ethash_sha3_512(seed).tobytes(), result.tobytes()) + + class TestEthash(unittest.TestCase): """Same test cases in go-ethereum.""" @@ -28,7 +41,7 @@ def test_cache_generation(self): for cache_size, epoch, expected_cache in testcases: block_number = epoch * EPOCH_LENGTH cache = mkcache(cache_size, block_number) - cache_hex = "".join(serialize_hash(ls).hex() for ls in cache) + cache_hex = "".join(row.tobytes().hex() for row in cache) self.assertEqual(cache_hex, expected_cache[2:]) def test_dataset_gen(self): @@ -45,7 +58,7 @@ def test_dataset_gen(self): block_number = epoch * EPOCH_LENGTH cache = mkcache(cache_size, block_number) dataset = calc_dataset(dataset_size, cache) - dataset_hex = "".join(serialize_hash(ls).hex() for ls in dataset) + dataset_hex = "".join(row.tobytes().hex() for row in dataset) self.assertEqual(dataset_hex, expected_dataset[2:]) def test_hashimoto(self): @@ -117,6 +130,44 @@ def test_ethash_mining(self): ) self.assertTrue(validity) + def test_cython_matches_python_fallback(self): + """numpy and Cython implementations both match the original hex-based baseline.""" + try: + from ethereum.pow.ethash_cy import cy_calc_dataset_item, cy_hashimoto_light + except ImportError: + self.skipTest("Cython extension not built") + + from ethereum.pow.ethash import calc_dataset_item, hashimoto + from ethereum.pow.tests import old_ethash + + old_cache = old_ethash.mkcache(1024, b"\x00" * 32) + new_cache = mkcache(1024, 0) + + # calc_dataset_item and cy_calc_dataset_item vs old baseline + for i in range(16): + baseline = old_ethash.serialize_hash(old_ethash.calc_dataset_item(old_cache, i)) + self.assertEqual( + calc_dataset_item(new_cache, i).tobytes(), baseline, + f"calc_dataset_item mismatch vs old at item {i}", + ) + self.assertEqual( + cy_calc_dataset_item(new_cache, i).tobytes(), baseline, + f"cy_calc_dataset_item mismatch vs old at item {i}", + ) + + # hashimoto_light: Python hashimoto vs Cython cy_hashimoto_light + header = bytes(32) + nonce = (0).to_bytes(8, byteorder="big") + full_size = 32 * 1024 + py_r = hashimoto(header, nonce, full_size, lambda x: calc_dataset_item(new_cache, x)) + cy_r = cy_hashimoto_light( + full_size, new_cache, + np.frombuffer(header, dtype=np.uint8), + np.frombuffer(nonce, dtype=np.uint8), + ) + self.assertEqual(py_r[b"mix digest"], cy_r[b"mix digest"]) + self.assertEqual(py_r[b"result"], cy_r[b"result"]) + def test_pyethash(self): header_hash = b"\xca/\xf0l\xaa\xe7\xc9M\xc9h\xbe}v\xd0\xfb\xf6\r\xd2\xe1\x98\x9e\xe9\xbf\rY1\xe4\x85d\xd5\x14;" for diff, expected_nonce in ((100, 34), (500, 78)): diff --git a/setup.py b/setup.py index 314502a2f..ae0742f67 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,31 @@ import os -from setuptools import setup +from setuptools import setup, Extension from setuptools.command.develop import develop +# Optional Cython extension: native inner loop for ethash calc_dataset_item. +# Built only if both Cython and numpy are importable at setup time. The +# Python implementation in ethereum/pow/ethash.py falls back transparently +# when the compiled module is not present. +ext_modules = [] +try: + from Cython.Build import cythonize + import numpy as _np + + ext_modules = cythonize( + [ + Extension( + "ethereum.pow.ethash_cy", + sources=["ethereum/pow/ethash_cy.pyx", "ethereum/pow/keccak_tiny.c"], + include_dirs=[_np.get_include(), "ethereum/pow"], + define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], + ) + ], + language_level=3, + ) +except ImportError: + pass + install_requires = set(x.strip() for x in open("requirements.txt")) install_requires_replacements = {} install_requires = [install_requires_replacements.get(r, r) for r in install_requires] @@ -46,4 +69,5 @@ def read(fname): install_requires=install_requires, python_requires=">=3.5", cmdclass={"develop": custom_develop}, + ext_modules=ext_modules, )