From 7a7522ecaa50824613a7d389e4202dc0f730a876 Mon Sep 17 00:00:00 2001 From: magnum Date: Tue, 14 Jun 2022 13:23:02 +0200 Subject: [PATCH] Astra Linux crypt variants using GOST R 34.11-94 or GOST R 34.11-2012 The former is 256-bit. The latter is 256 or 512-bit and also known as Streebog. The formats exactly match Drepper's sha256crypt and sha512crypt but with the hash primitive replaced with one of the GOST alternatives. gost94crypt ($gost94hash$) streebog256crypt ($gost12256hash$) streebog512crypt ($gost12512hash$) --- run/opencl/gost12256hash_kernel.cl | 302 +++++++++ run/opencl/gost12512hash_kernel.cl | 306 +++++++++ run/opencl/gost94hash_kernel.cl | 292 +++++++++ run/opencl/opencl_gost94.h | 585 +++++++++++++++++ run/opencl/opencl_streebog.h | 971 ++++++++++++++++++++++++++++ src/gost12256hash_fmt_plug.c | 441 +++++++++++++ src/gost12512hash_fmt_plug.c | 442 +++++++++++++ src/gost94hash_fmt_plug.c | 443 +++++++++++++ src/opencl_common.c | 6 +- src/opencl_gost12256hash_fmt_plug.c | 492 ++++++++++++++ src/opencl_gost12512hash_fmt_plug.c | 491 ++++++++++++++ src/opencl_gost94hash_fmt_plug.c | 488 ++++++++++++++ 12 files changed, 5258 insertions(+), 1 deletion(-) create mode 100644 run/opencl/gost12256hash_kernel.cl create mode 100644 run/opencl/gost12512hash_kernel.cl create mode 100644 run/opencl/gost94hash_kernel.cl create mode 100644 run/opencl/opencl_gost94.h create mode 100644 run/opencl/opencl_streebog.h create mode 100644 src/gost12256hash_fmt_plug.c create mode 100644 src/gost12512hash_fmt_plug.c create mode 100644 src/gost94hash_fmt_plug.c create mode 100644 src/opencl_gost12256hash_fmt_plug.c create mode 100644 src/opencl_gost12512hash_fmt_plug.c create mode 100644 src/opencl_gost94hash_fmt_plug.c diff --git a/run/opencl/gost12256hash_kernel.cl b/run/opencl/gost12256hash_kernel.cl new file mode 100644 index 0000000000..290f36ca43 --- /dev/null +++ b/run/opencl/gost12256hash_kernel.cl @@ -0,0 +1,302 @@ +/* + * This software is Copyright 2022 magnum + * and it is hereby released to the general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#include "opencl_misc.h" + +#define STREEBOG256CRYPT 1 +#define STREEBOG_LOCAL_AX 1 +#define STREEBOG_VECTOR 1 +#define STREEBOG_UNROLL 0 +#define STREEBOG_MANUAL_UNROLL 0 +#include "opencl_streebog.h" + +#define SALT_LENGTH 16 +#define BINARY_SIZE 32 + +typedef struct { + uint len; + uchar key[PLAINTEXT_LENGTH]; +} inbuf; + +typedef struct { + uint rounds; + uint len; + uchar salt[SALT_LENGTH]; +} saltstruct; + +typedef struct { + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; +} statebuf; + +__kernel void gost12256init(__global inbuf *in, + MAYBE_CONSTANT saltstruct *ssalt, + __global statebuf *state, + __local localbuf *loc_buf, + __global uint256_u *out) +{ + GOST34112012Context ctx; + GOST34112012Context alt_ctx; + uint256_u result; + uint256_u temp_result; + uint gid = get_global_id(0); + uint cnt; + uint len = in[gid].len; + uint saltlen = ssalt->len; + uchar *cp; + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; + uchar key[PLAINTEXT_LENGTH]; + uchar salt[SALT_LENGTH]; + +#if STREEBOG_LOCAL_AX + uint ls = get_local_size(0); + uint lid = get_local_id(0); + + for (uint i = lid; i < 256; i += ls) { + for (uint j = 0; j < 8; j++) + loc_buf->Ax[j][i] = Ax[j][i]; + } + barrier(CLK_LOCAL_MEM_FENCE); +#endif + + /* Copy to private memory */ + memcpy_gp(key, in[gid].key, len); + memcpy_mcp(salt, ssalt->salt, saltlen); + + /* Prepare for the real work. */ + GOST34112012Init(&ctx, 256); + + /* Add the key string. */ + GOST34112012Update(&ctx, key, len, loc_buf); + + /* The last part is the salt string. This must be at most 16 + characters and it ends at the first `$' character (for + compatibility with existing implementations). */ + GOST34112012Update(&ctx, salt, saltlen, loc_buf); + + + /* Compute alternate Streebog sum with input KEY, SALT, and KEY. The + final result will be added to the first context. */ + GOST34112012Init(&alt_ctx, 256); + + /* Add key. */ + GOST34112012Update(&alt_ctx, key, len, loc_buf); + + /* Add salt. */ + GOST34112012Update(&alt_ctx, salt, saltlen, loc_buf); + + /* Add key again. */ + GOST34112012Update(&alt_ctx, key, len, loc_buf); + + /* Now get result of this (32 bytes) and add it to the other + context. */ + GOST34112012Final(&alt_ctx, &result, loc_buf); + + /* Add for any character in the key one byte of the alternate sum. */ +#if PLAINTEXT_LENGTH > BINARY_SIZE + for (cnt = len; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); +#else + cnt = len; +#endif + GOST34112012Update(&ctx, result.BYTES, cnt, loc_buf); + + /* Take the binary representation of the length of the key and for every + 1 add the alternate sum, for every 0 the key. */ + for (cnt = len; cnt > 0; cnt >>= 1) + if ((cnt & 1) != 0) + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + else + GOST34112012Update(&ctx, key, len, loc_buf); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, &result, loc_buf); + + /* Start computation of P byte sequence. */ + GOST34112012Init(&alt_ctx, 256); + + /* For every character in the password add the entire password. */ + for (cnt = 0; cnt < len; ++cnt) + GOST34112012Update(&alt_ctx, key, len, loc_buf); + + /* Finish the digest. */ + GOST34112012Final(&alt_ctx, &temp_result, loc_buf); + + /* Create byte sequence P. */ + cp = p_bytes; +#if PLAINTEXT_LENGTH > BINARY_SIZE + for (cnt = len; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) { + memcpy_pp(cp, &temp_result, BINARY_SIZE); + cp += BINARY_SIZE; + } +#else + cnt = len; +#endif + memcpy_pp(cp, &temp_result, cnt); + + /* Start computation of S byte sequence. */ + GOST34112012Init(&alt_ctx, 256); + + /* repeat the following 16+A[0] times, where A[0] represents the + first byte in digest A interpreted as an 8-bit uvalue */ + for (cnt = 0; cnt < 16 + result.BYTES[0]; ++cnt) + GOST34112012Update(&alt_ctx, salt, saltlen, loc_buf); + + /* Finish the digest. */ + GOST34112012Final(&alt_ctx, &temp_result, loc_buf); + + /* Create byte sequence S. */ + cp = s_bytes; +#if SALT_LENGTH > BINARY_SIZE + for (cnt = saltlen; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) { + memcpy_pp(cp, &temp_result, BINARY_SIZE); + cp += BINARY_SIZE; + } +#else + cnt = saltlen; +#endif + memcpy_pp(cp, &temp_result, cnt); + + /* Here's everything we need for the loop kernel */ + memcpy256(&(out[gid]), &(result)); + memcpy_pg(state[gid].p_bytes, p_bytes, len); + memcpy_pg(state[gid].s_bytes, s_bytes, saltlen); +} + +__kernel void gost12256loop(__global inbuf *in, + MAYBE_CONSTANT saltstruct *ssalt, + __global statebuf *state, + __local localbuf *loc_buf, + __global uint256_u *out) +{ + GOST34112012Context ctx; + uint256_u result; + uint gid = get_global_id(0); + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; + + uint cnt; + uint saltlen = ssalt->len; + uint len = in[gid].len; + + memcpy256(&(result), &(out[gid])); + memcpy_gp(p_bytes, state[gid].p_bytes, len); + memcpy_gp(s_bytes, state[gid].s_bytes, saltlen); + +#if STREEBOG_LOCAL_AX + uint ls = get_local_size(0); + uint lid = get_local_id(0); + + for (uint i = lid; i < 256; i += ls) { + for (uint j = 0; j < 8; j++) + loc_buf->Ax[j][i] = Ax[j][i]; + } + barrier(CLK_LOCAL_MEM_FENCE); +#endif + + /* Repeatedly run the collected hash value through Streebog to burn CPU cycles. */ +#pragma unroll HASH_LOOPS + for (cnt = 0; cnt < HASH_LOOPS; ++cnt) { + /* New context. */ + GOST34112012Init(&ctx, 256); + + /* Add key or last result. */ + if (cnt & 1) + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + else + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + + /* Add salt for numbers not divisible by 3. */ + if (cnt % 3) + GOST34112012Update(&ctx, s_bytes, saltlen, loc_buf); + + /* Add key for numbers not divisible by 7. */ + if (cnt % 7) + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + + /* Add key or last result. */ + if (cnt & 1) + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + else + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, &result, loc_buf); + } + + memcpy256(&(out[gid]), &(result)); +} + +__kernel void gost12256final(__global inbuf *in, + MAYBE_CONSTANT saltstruct *ssalt, + __global statebuf *state, + __local localbuf *loc_buf, + __global uint256_u *out) +{ + GOST34112012Context ctx; + uint256_u result; + uint gid = get_global_id(0); + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; + uint saltlen, len; + uint cnt; + + uint rounds = ssalt->rounds % HASH_LOOPS; + + memcpy256(&(result), &(out[gid])); + +#if STREEBOG_LOCAL_AX + if (rounds) { + saltlen = ssalt->len; + len = in[gid].len; + memcpy_gp(p_bytes, state[gid].p_bytes, len); + memcpy_gp(s_bytes, state[gid].s_bytes, saltlen); + + uint ls = get_local_size(0); + uint lid = get_local_id(0); + + for (uint i = lid; i < 256; i += ls) { + for (uint j = 0; j < 8; j++) + loc_buf->Ax[j][i] = Ax[j][i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + } +#endif + + /* Repeatedly run the collected hash value through Streebog to burn CPU cycles. */ + for (cnt = 0; cnt < rounds; ++cnt) { + /* New context. */ + GOST34112012Init(&ctx, 256); + + /* Add key or last result. */ + if (cnt & 1) + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + else + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + + /* Add salt for numbers not divisible by 3. */ + if (cnt % 3) + GOST34112012Update(&ctx, s_bytes, saltlen, loc_buf); + + /* Add key for numbers not divisible by 7. */ + if (cnt % 7) + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + + /* Add key or last result. */ + if (cnt & 1) + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + else + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, &result, loc_buf); + } + + memcpy256(&(out[gid]), &(result)); +} diff --git a/run/opencl/gost12512hash_kernel.cl b/run/opencl/gost12512hash_kernel.cl new file mode 100644 index 0000000000..e4065a6df6 --- /dev/null +++ b/run/opencl/gost12512hash_kernel.cl @@ -0,0 +1,306 @@ +/* + * This software is Copyright 2022 magnum + * and it is hereby released to the general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#include "opencl_misc.h" +#define STREEBOG512CRYPT 1 +#define STREEBOG_LOCAL_AX 1 +#define STREEBOG_VECTOR 1 +/* + * Without unroll: Build time: 45.125 s, binary size 2542391, 10402 c/s + * Pragma unroll: Build time: 21 min 14.788 s, binary size 11502092, 10051 c/s + * Manual unroll: Build time: 4 min 26.142 s, binary size 5961490, 9959 c/s + */ +#define STREEBOG_UNROLL 0 +#define STREEBOG_MANUAL_UNROLL 0 +#include "opencl_streebog.h" + +#define SALT_LENGTH 16 +#define BINARY_SIZE 64 + +typedef struct { + uint len; + uchar key[PLAINTEXT_LENGTH]; +} inbuf; + +typedef struct { + uint rounds; + uint len; + uchar salt[SALT_LENGTH]; +} saltstruct; + +typedef struct { + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; +} statebuf; + +__kernel void gost12512init(__global inbuf *in, + MAYBE_CONSTANT saltstruct *ssalt, + __global statebuf *state, + __local localbuf *loc_buf, + __global uint512_u *out) +{ + GOST34112012Context ctx; + GOST34112012Context alt_ctx; + uint512_u result; + uint512_u temp_result; + uint gid = get_global_id(0); + uint cnt; + uint len = in[gid].len; + uint saltlen = ssalt->len; + uchar *cp; + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; + uchar key[PLAINTEXT_LENGTH]; + uchar salt[SALT_LENGTH]; + +#if STREEBOG_LOCAL_AX + uint ls = get_local_size(0); + uint lid = get_local_id(0); + + for (uint i = lid; i < 256; i += ls) { + for (uint j = 0; j < 8; j++) + loc_buf->Ax[j][i] = Ax[j][i]; + } + barrier(CLK_LOCAL_MEM_FENCE); +#endif + + /* Copy to private memory */ + memcpy_gp(key, in[gid].key, len); + memcpy_mcp(salt, ssalt->salt, saltlen); + + /* Prepare for the real work. */ + GOST34112012Init(&ctx, 512); + + /* Add the key string. */ + GOST34112012Update(&ctx, key, len, loc_buf); + + /* The last part is the salt string. This must be at most 16 + characters and it ends at the first `$' character (for + compatibility with existing implementations). */ + GOST34112012Update(&ctx, salt, saltlen, loc_buf); + + + /* Compute alternate Streebog sum with input KEY, SALT, and KEY. The + final result will be added to the first context. */ + GOST34112012Init(&alt_ctx, 512); + + /* Add key. */ + GOST34112012Update(&alt_ctx, key, len, loc_buf); + + /* Add salt. */ + GOST34112012Update(&alt_ctx, salt, saltlen, loc_buf); + + /* Add key again. */ + GOST34112012Update(&alt_ctx, key, len, loc_buf); + + /* Now get result of this (64 bytes) and add it to the other + context. */ + GOST34112012Final(&alt_ctx, &result, loc_buf); + + /* Add for any character in the key one byte of the alternate sum. */ +#if PLAINTEXT_LENGTH > BINARY_SIZE + for (cnt = len; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); +#else + cnt = len; +#endif + GOST34112012Update(&ctx, result.BYTES, cnt, loc_buf); + + /* Take the binary representation of the length of the key and for every + 1 add the alternate sum, for every 0 the key. */ + for (cnt = len; cnt > 0; cnt >>= 1) + if ((cnt & 1) != 0) + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + else + GOST34112012Update(&ctx, key, len, loc_buf); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, &result, loc_buf); + + /* Start computation of P byte sequence. */ + GOST34112012Init(&alt_ctx, 512); + + /* For every character in the password add the entire password. */ + for (cnt = 0; cnt < len; ++cnt) + GOST34112012Update(&alt_ctx, key, len, loc_buf); + + /* Finish the digest. */ + GOST34112012Final(&alt_ctx, &temp_result, loc_buf); + + /* Create byte sequence P. */ + cp = p_bytes; +#if PLAINTEXT_LENGTH > BINARY_SIZE + for (cnt = len; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) { + memcpy_pp(cp, &temp_result, BINARY_SIZE); + cp += BINARY_SIZE; + } +#else + cnt = len; +#endif + memcpy_pp(cp, &temp_result, cnt); + + /* Start computation of S byte sequence. */ + GOST34112012Init(&alt_ctx, 512); + + /* repeat the following 16+A[0] times, where A[0] represents the + first byte in digest A interpreted as an 8-bit uvalue */ + for (cnt = 0; cnt < 16 + result.BYTES[0]; ++cnt) + GOST34112012Update(&alt_ctx, salt, saltlen, loc_buf); + + /* Finish the digest. */ + GOST34112012Final(&alt_ctx, &temp_result, loc_buf); + + /* Create byte sequence S. */ + cp = s_bytes; +#if SALT_LENGTH > BINARY_SIZE + for (cnt = saltlen; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) { + memcpy_pp(cp, &temp_result, BINARY_SIZE); + cp += BINARY_SIZE; + } +#else + cnt = saltlen; +#endif + memcpy_pp(cp, &temp_result, cnt); + + /* Here's everything we need for the loop kernel */ + memcpy512(&(out[gid]), &(result)); + memcpy_pg(state[gid].p_bytes, p_bytes, len); + memcpy_pg(state[gid].s_bytes, s_bytes, saltlen); +} + +__kernel void gost12512loop(__global inbuf *in, + MAYBE_CONSTANT saltstruct *ssalt, + __global statebuf *state, + __local localbuf *loc_buf, + __global uint512_u *out) +{ + GOST34112012Context ctx; + uint512_u result; + uint gid = get_global_id(0); + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; + + uint cnt; + uint saltlen = ssalt->len; + uint len = in[gid].len; + + memcpy512(&(result), &(out[gid])); + memcpy_gp(p_bytes, state[gid].p_bytes, len); + memcpy_gp(s_bytes, state[gid].s_bytes, saltlen); + +#if STREEBOG_LOCAL_AX + uint ls = get_local_size(0); + uint lid = get_local_id(0); + + for (uint i = lid; i < 256; i += ls) { + for (uint j = 0; j < 8; j++) + loc_buf->Ax[j][i] = Ax[j][i]; + } + barrier(CLK_LOCAL_MEM_FENCE); +#endif + + /* Repeatedly run the collected hash value through Streebog to burn CPU cycles. */ +#pragma unroll HASH_LOOPS + for (cnt = 0; cnt < HASH_LOOPS; ++cnt) { + /* New context. */ + GOST34112012Init(&ctx, 512); + + /* Add key or last result. */ + if (cnt & 1) + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + else + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + + /* Add salt for numbers not divisible by 3. */ + if (cnt % 3) + GOST34112012Update(&ctx, s_bytes, saltlen, loc_buf); + + /* Add key for numbers not divisible by 7. */ + if (cnt % 7) + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + + /* Add key or last result. */ + if (cnt & 1) + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + else + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, &result, loc_buf); + } + + memcpy512(&(out[gid]), &(result)); +} + +__kernel void gost12512final(__global inbuf *in, + MAYBE_CONSTANT saltstruct *ssalt, + __global statebuf *state, + __local localbuf *loc_buf, + __global uint512_u *out) +{ + GOST34112012Context ctx; + uint512_u result; + uint gid = get_global_id(0); + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; + uint saltlen, len; + uint cnt; + + uint rounds = ssalt->rounds % HASH_LOOPS; + + memcpy512(&(result), &(out[gid])); + +#if STREEBOG_LOCAL_AX + if (rounds) { + saltlen = ssalt->len; + len = in[gid].len; + memcpy_gp(p_bytes, state[gid].p_bytes, len); + memcpy_gp(s_bytes, state[gid].s_bytes, saltlen); + + uint ls = get_local_size(0); + uint lid = get_local_id(0); + + for (uint i = lid; i < 256; i += ls) { + for (uint j = 0; j < 8; j++) + loc_buf->Ax[j][i] = Ax[j][i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + } +#endif + + /* Repeatedly run the collected hash value through Streebog to burn CPU cycles. */ + for (cnt = 0; cnt < rounds; ++cnt) { + /* New context. */ + GOST34112012Init(&ctx, 512); + + /* Add key or last result. */ + if (cnt & 1) + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + else + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + + /* Add salt for numbers not divisible by 3. */ + if (cnt % 3) + GOST34112012Update(&ctx, s_bytes, saltlen, loc_buf); + + /* Add key for numbers not divisible by 7. */ + if (cnt % 7) + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + + /* Add key or last result. */ + if (cnt & 1) + GOST34112012Update(&ctx, result.BYTES, BINARY_SIZE, loc_buf); + else + GOST34112012Update(&ctx, p_bytes, len, loc_buf); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, &result, loc_buf); + } + + memcpy512(&(out[gid]), &(result)); +} diff --git a/run/opencl/gost94hash_kernel.cl b/run/opencl/gost94hash_kernel.cl new file mode 100644 index 0000000000..9cca09b10f --- /dev/null +++ b/run/opencl/gost94hash_kernel.cl @@ -0,0 +1,292 @@ +/* + * This software is Copyright 2022 magnum + * and it is hereby released to the general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#include "opencl_misc.h" + +#define GOST94_USE_LOCAL 1 +#define GOST94_FLAT_INIT 1 +#include "opencl_gost94.h" + +#define SALT_LENGTH 16 +#define BINARY_SIZE 32 + +typedef struct { + uint len; + uchar key[PLAINTEXT_LENGTH]; +} inbuf; + +typedef struct { + uint rounds; + uint len; + uchar salt[SALT_LENGTH]; +} saltstruct; + +typedef struct { + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; +} statebuf; + +typedef struct { + uint v[BINARY_SIZE / sizeof(uint)]; +} outbuf; + +__kernel void gost94init(__global inbuf *in, + MAYBE_CONSTANT saltstruct *ssalt, + __global statebuf *state, + __local rhash_gost94_sbox *sbox_ptr, + __global outbuf *out) +{ + gost94_ctx ctx; + gost94_ctx alt_ctx; + uchar result[BINARY_SIZE]; + uchar temp_result[BINARY_SIZE]; + uint gid = get_global_id(0); + uint cnt; + uint len = in[gid].len; + uint saltlen = ssalt->len; + uchar *cp; + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; + uchar key[PLAINTEXT_LENGTH]; + uchar salt[SALT_LENGTH]; +#if !GOST94_USE_LOCAL + rhash_gost94_sbox sbox_buf; +#define sbox &sbox_buf +#else +#define sbox sbox_ptr +#endif + + gost94_init_table(sbox); + + /* Copy to private memory */ + memcpy_gp(key, in[gid].key, len); + memcpy_mcp(salt, ssalt->salt, saltlen); + + /* Prepare for the real work. */ + gost94_init(&ctx); + + /* Add the key string. */ + gost94_update(&ctx, key, len, sbox); + + /* The last part is the salt string. This must be at most 16 + characters and it ends at the first `$' character (for + compatibility with existing implementations). */ + gost94_update(&ctx, salt, saltlen, sbox); + + + /* Compute alternate GOST94 sum with input KEY, SALT, and KEY. The + final result will be added to the first context. */ + gost94_init(&alt_ctx); + + /* Add key. */ + gost94_update(&alt_ctx, key, len, sbox); + + /* Add salt. */ + gost94_update(&alt_ctx, salt, saltlen, sbox); + + /* Add key again. */ + gost94_update(&alt_ctx, key, len, sbox); + + /* Now get result of this (32 bytes) and add it to the other + context. */ + gost94_final(&alt_ctx, result, sbox); + + /* Add for any character in the key one byte of the alternate sum. */ +#if PLAINTEXT_LENGTH > BINARY_SIZE + for (cnt = len; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) + gost94_update(&ctx, result, BINARY_SIZE, sbox); +#else + cnt = len; +#endif + gost94_update(&ctx, result, cnt, sbox); + + /* Take the binary representation of the length of the key and for every + 1 add the alternate sum, for every 0 the key. */ + for (cnt = len; cnt > 0; cnt >>= 1) + if ((cnt & 1) != 0) + gost94_update(&ctx, result, BINARY_SIZE, sbox); + else + gost94_update(&ctx, key, len, sbox); + + /* Create intermediate result. */ + gost94_final(&ctx, result, sbox); + + /* Start computation of P byte sequence. */ + gost94_init(&alt_ctx); + + /* For every character in the password add the entire password. */ + for (cnt = 0; cnt < len; ++cnt) + gost94_update(&alt_ctx, key, len, sbox); + + /* Finish the digest. */ + gost94_final(&alt_ctx, temp_result, sbox); + + /* Create byte sequence P. */ + cp = p_bytes; +#if PLAINTEXT_LENGTH > BINARY_SIZE + for (cnt = len; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) { + memcpy_pp(cp, temp_result, BINARY_SIZE); + cp += BINARY_SIZE; + } +#else + cnt = len; +#endif + memcpy_pp(cp, temp_result, cnt); + + /* Start computation of S byte sequence. */ + gost94_init(&alt_ctx); + + /* repeat the following 16+A[0] times, where A[0] represents the + first byte in digest A interpreted as an 8-bit uvalue */ + for (cnt = 0; cnt < 16 + result[0]; ++cnt) + gost94_update(&alt_ctx, salt, saltlen, sbox); + + /* Finish the digest. */ + gost94_final(&alt_ctx, temp_result, sbox); + + /* Create byte sequence S. */ + cp = s_bytes; +#if SALT_LENGTH > BINARY_SIZE + for (cnt = saltlen; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) { + memcpy_pp(cp, temp_result, BINARY_SIZE); + cp += BINARY_SIZE; + } +#else + cnt = saltlen; +#endif + memcpy_pp(cp, temp_result, cnt); + + /* Here's everything we need for the loop kernel */ + memcpy_pg(out[gid].v, result, BINARY_SIZE); + memcpy_pg(state[gid].p_bytes, p_bytes, len); + memcpy_pg(state[gid].s_bytes, s_bytes, saltlen); +} + +__kernel void gost94loop(__global inbuf *in, + MAYBE_CONSTANT saltstruct *ssalt, + __global statebuf *state, + __local rhash_gost94_sbox *sbox_ptr, + __global outbuf *out) +{ + gost94_ctx ctx; + uchar result[BINARY_SIZE]; + uint gid = get_global_id(0); + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; +#if !GOST94_USE_LOCAL + rhash_gost94_sbox sbox_buf; +#define sbox &sbox_buf +#else +#define sbox sbox_ptr +#endif + + gost94_init_table(sbox); + + uint cnt; + uint saltlen = ssalt->len; + uint len = in[gid].len; + + memcpy_gp(result, out[gid].v, BINARY_SIZE); + memcpy_gp(p_bytes, state[gid].p_bytes, len); + memcpy_gp(s_bytes, state[gid].s_bytes, saltlen); + + /* Repeatedly run the collected hash value through GOST94 to burn CPU cycles. */ +#pragma unroll HASH_LOOPS + for (cnt = 0; cnt < HASH_LOOPS; ++cnt) { + /* New context. */ + gost94_init(&ctx); + + /* Add key or last result. */ + if (cnt & 1) + gost94_update(&ctx, p_bytes, len, sbox); + else + gost94_update(&ctx, result, BINARY_SIZE, sbox); + + /* Add salt for numbers not divisible by 3. */ + if (cnt % 3) + gost94_update(&ctx, s_bytes, saltlen, sbox); + + /* Add key for numbers not divisible by 7. */ + if (cnt % 7) + gost94_update(&ctx, p_bytes, len, sbox); + + /* Add key or last result. */ + if (cnt & 1) + gost94_update(&ctx, result, BINARY_SIZE, sbox); + else + gost94_update(&ctx, p_bytes, len, sbox); + + /* Create intermediate result. */ + gost94_final(&ctx, result, sbox); + } + + memcpy_pg(out[gid].v, result, BINARY_SIZE); +} + +__kernel void gost94final(__global inbuf *in, + MAYBE_CONSTANT saltstruct *ssalt, + __global statebuf *state, + __local rhash_gost94_sbox *sbox_ptr, + __global outbuf *out) +{ + gost94_ctx ctx; + uchar result[BINARY_SIZE]; + uint gid = get_global_id(0); + uchar p_bytes[PLAINTEXT_LENGTH]; + uchar s_bytes[SALT_LENGTH]; + uint saltlen, len; + uint cnt; + uint rounds = ssalt->rounds % HASH_LOOPS; +#if !GOST94_USE_LOCAL + rhash_gost94_sbox sbox_buf; +#define sbox &sbox_buf +#else +#define sbox sbox_ptr +#endif + + memcpy_gp(result, out[gid].v, BINARY_SIZE); + + if (rounds) { + gost94_init_table(sbox); + saltlen = ssalt->len; + len = in[gid].len; + memcpy_gp(p_bytes, state[gid].p_bytes, len); + memcpy_gp(s_bytes, state[gid].s_bytes, saltlen); + } + + /* Repeatedly run the collected hash value through GOST94 to burn CPU cycles. */ + for (cnt = 0; cnt < rounds; ++cnt) { + /* New context. */ + gost94_init(&ctx); + + /* Add key or last result. */ + if (cnt & 1) + gost94_update(&ctx, p_bytes, len, sbox); + else + gost94_update(&ctx, result, BINARY_SIZE, sbox); + + /* Add salt for numbers not divisible by 3. */ + if (cnt % 3) + gost94_update(&ctx, s_bytes, saltlen, sbox); + + /* Add key for numbers not divisible by 7. */ + if (cnt % 7) + gost94_update(&ctx, p_bytes, len, sbox); + + /* Add key or last result. */ + if (cnt & 1) + gost94_update(&ctx, result, BINARY_SIZE, sbox); + else + gost94_update(&ctx, p_bytes, len, sbox); + + /* Create intermediate result. */ + gost94_final(&ctx, result, sbox); + } + + memcpy_pg(out[gid].v, result, BINARY_SIZE); +} diff --git a/run/opencl/opencl_gost94.h b/run/opencl/opencl_gost94.h new file mode 100644 index 0000000000..d72834a247 --- /dev/null +++ b/run/opencl/opencl_gost94.h @@ -0,0 +1,585 @@ +/* + * gost.c - an implementation of GOST Hash Function + * based on the Russian Standard GOST R 34.11-94. + * See also RFC 4357. + * + * Copyright: 2009 Aleksey Kravchenko + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so. + * + * Porting to OpenCL + optimizations: Copyright (c) 2022 magnum, and those changes + * hereby released to the general public under the following terms: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#ifndef GOST94_H +#define GOST94_H + +#include "opencl_misc.h" + +#if GOST94_USE_LOCAL +#define MAYBE_LOCAL __local +#else +#define MAYBE_LOCAL +#endif + +#if GOST_UNROLL +#define UNROLL8 _Pragma("unroll 8") +#define UNROLL16 _Pragma("unroll 16") +#else +#define UNROLL8 +#define UNROLL16 +#endif + +#define memcpy512(dst, src) do { \ + UNROLL16 \ + for (uint i = 0; i < 16; i++) \ + (dst)->DWORD[i] = (src)->DWORD[i]; \ + } while (0) + +#define memcpy256(dst, src) do { \ + UNROLL8 \ + for (uint i = 0; i < 8; i++) \ + (dst)->DWORD[i] = (src)->DWORD[i]; \ + } while (0) + +typedef union { + uint array[4][256]; + uint flat[4 * 256]; +} rhash_gost94_sbox; + +#define GOST94_BLOCK_SIZE 32 +#define GOST94_HASH_LENGTH 32 + +/* algorithm context */ +typedef struct { + uint hash[8]; /* algorithm 256-bit state */ + uint sum[8]; /* sum of processed message blocks */ + uchar message[GOST94_BLOCK_SIZE]; /* 256-bit buffer for leftovers */ + uint length; /* number of processed bytes */ +} gost94_ctx; + +/** + * Initialize algorithm context before calculaing hash + * with test parameters set. + * + * @param ctx context to initialize + */ +inline void gost94_init(gost94_ctx *ctx) +{ + memset_p(ctx, 0, sizeof(gost94_ctx)); +} + +/* + * A macro that performs a full encryption round of GOST 28147-89. + * Temporary variables tmp assumed and variables r and l for left and right + * blocks. + */ +#define GOST94_ENCRYPT_ROUND(key1, key2, sbox) do { \ + tmp = (key1) + r; \ + l ^= (sbox->flat)[tmp & 0xff] ^ ((sbox->flat) + 256)[(tmp >> 8) & 0xff] ^ \ + ((sbox->flat) + 512)[(tmp >> 16) & 0xff] ^ ((sbox->flat) + 768)[tmp >> 24]; \ + tmp = (key2) + l; \ + r ^= (sbox->flat)[tmp & 0xff] ^ ((sbox->flat) + 256)[(tmp >> 8) & 0xff] ^ \ + ((sbox->flat) + 512)[(tmp >> 16) & 0xff] ^ ((sbox->flat) + 768)[tmp >> 24]; \ + } while (0) + +/* encrypt a block with the given key */ +#define GOST94_ENCRYPT(result, i, key, hash, sbox) do { \ + uint l, r, tmp; \ + r = hash[i], l = hash[i + 1]; \ + GOST94_ENCRYPT_ROUND(key[0], key[1], sbox); \ + GOST94_ENCRYPT_ROUND(key[2], key[3], sbox); \ + GOST94_ENCRYPT_ROUND(key[4], key[5], sbox); \ + GOST94_ENCRYPT_ROUND(key[6], key[7], sbox); \ + GOST94_ENCRYPT_ROUND(key[0], key[1], sbox); \ + GOST94_ENCRYPT_ROUND(key[2], key[3], sbox); \ + GOST94_ENCRYPT_ROUND(key[4], key[5], sbox); \ + GOST94_ENCRYPT_ROUND(key[6], key[7], sbox); \ + GOST94_ENCRYPT_ROUND(key[0], key[1], sbox); \ + GOST94_ENCRYPT_ROUND(key[2], key[3], sbox); \ + GOST94_ENCRYPT_ROUND(key[4], key[5], sbox); \ + GOST94_ENCRYPT_ROUND(key[6], key[7], sbox); \ + GOST94_ENCRYPT_ROUND(key[7], key[6], sbox); \ + GOST94_ENCRYPT_ROUND(key[5], key[4], sbox); \ + GOST94_ENCRYPT_ROUND(key[3], key[2], sbox); \ + GOST94_ENCRYPT_ROUND(key[1], key[0], sbox); \ + result[i] = l, result[i + 1] = r; \ + } while (0) + +/** + * The core transformation. Process a 512-bit block. + * + * @param hash intermediate message hash + * @param block the message block to process + */ +static void rhash_gost94_block_compress(gost94_ctx *ctx, const uint* block, MAYBE_LOCAL const rhash_gost94_sbox *sbox) +{ + uint i; + uint key[8], u[8], v[8], w[8], s[8]; + + /* u := hash, v := <256-bit message block> */ + memcpy_pp(u, ctx->hash, sizeof(u)); + memcpy_pp(v, block, sizeof(v)); + + /* w := u xor v */ + w[0] = u[0] ^ v[0], w[1] = u[1] ^ v[1]; + w[2] = u[2] ^ v[2], w[3] = u[3] ^ v[3]; + w[4] = u[4] ^ v[4], w[5] = u[5] ^ v[5]; + w[6] = u[6] ^ v[6], w[7] = u[7] ^ v[7]; + + /* calculate keys, encrypt hash and store result to the s[] array */ + for (i = 0;; i += 2) { + /* key generation: key_i := P(w) */ + key[0] = (w[0] & 0x000000ff) | ((w[2] & 0x000000ff) << 8) | ((w[4] & 0x000000ff) << 16) | ((w[6] & 0x000000ff) << 24); + key[1] = ((w[0] & 0x0000ff00) >> 8) | (w[2] & 0x0000ff00) | ((w[4] & 0x0000ff00) << 8) | ((w[6] & 0x0000ff00) << 16); + key[2] = ((w[0] & 0x00ff0000) >> 16) | ((w[2] & 0x00ff0000) >> 8) | (w[4] & 0x00ff0000) | ((w[6] & 0x00ff0000) << 8); + key[3] = ((w[0] & 0xff000000) >> 24) | ((w[2] & 0xff000000) >> 16) | ((w[4] & 0xff000000) >> 8) | (w[6] & 0xff000000); + key[4] = (w[1] & 0x000000ff) | ((w[3] & 0x000000ff) << 8) | ((w[5] & 0x000000ff) << 16) | ((w[7] & 0x000000ff) << 24); + key[5] = ((w[1] & 0x0000ff00) >> 8) | (w[3] & 0x0000ff00) | ((w[5] & 0x0000ff00) << 8) | ((w[7] & 0x0000ff00) << 16); + key[6] = ((w[1] & 0x00ff0000) >> 16) | ((w[3] & 0x00ff0000) >> 8) | (w[5] & 0x00ff0000) | ((w[7] & 0x00ff0000) << 8); + key[7] = ((w[1] & 0xff000000) >> 24) | ((w[3] & 0xff000000) >> 16) | ((w[5] & 0xff000000) >> 8) | (w[7] & 0xff000000); + + /* encryption: s_i := E_{key_i} (h_i) */ + GOST94_ENCRYPT(s, i, key, ctx->hash, sbox); + + if (i == 0) { + /* w:= A(u) ^ A^2(v) */ + w[0] = u[2] ^ v[4], w[1] = u[3] ^ v[5]; + w[2] = u[4] ^ v[6], w[3] = u[5] ^ v[7]; + w[4] = u[6] ^ (v[0] ^= v[2]); + w[5] = u[7] ^ (v[1] ^= v[3]); + w[6] = (u[0] ^= u[2]) ^ (v[2] ^= v[4]); + w[7] = (u[1] ^= u[3]) ^ (v[3] ^= v[5]); + } else if ((i & 2) != 0) { + if (i == 6) break; + + /* w := A^2(u) xor A^4(v) xor C_3; u := A(u) xor C_3 */ + /* C_3=0xff00ffff000000ffff0000ff00ffff0000ff00ff00ff00ffff00ff00ff00ff00 */ + u[2] ^= u[4] ^ 0x000000ff; + u[3] ^= u[5] ^ 0xff00ffff; + u[4] ^= 0xff00ff00; + u[5] ^= 0xff00ff00; + u[6] ^= 0x00ff00ff; + u[7] ^= 0x00ff00ff; + u[0] ^= 0x00ffff00; + u[1] ^= 0xff0000ff; + + w[0] = u[4] ^ v[0]; + w[2] = u[6] ^ v[2]; + w[4] = u[0] ^ (v[4] ^= v[6]); + w[6] = u[2] ^ (v[6] ^= v[0]); + w[1] = u[5] ^ v[1]; + w[3] = u[7] ^ v[3]; + w[5] = u[1] ^ (v[5] ^= v[7]); + w[7] = u[3] ^ (v[7] ^= v[1]); + } else { + /* i==4 here */ + /* w:= A( A^2(u) xor C_3 ) xor A^6(v) */ + w[0] = u[6] ^ v[4], w[1] = u[7] ^ v[5]; + w[2] = u[0] ^ v[6], w[3] = u[1] ^ v[7]; + w[4] = u[2] ^ (v[0] ^= v[2]); + w[5] = u[3] ^ (v[1] ^= v[3]); + w[6] = (u[4] ^= u[6]) ^ (v[2] ^= v[4]); + w[7] = (u[5] ^= u[7]) ^ (v[3] ^= v[5]); + } + } + + /* step hash function: x(block, hash) := psi^61(hash xor psi(block xor psi^12(S))) */ + + /* 12 rounds of the LFSR and xor in */ + u[0] = block[0] ^ s[6]; + u[1] = block[1] ^ s[7]; + u[2] = block[2] ^ (s[0] << 16) ^ (s[0] >> 16) ^ (s[0] & 0xffff) ^ (s[1] & 0xffff) ^ (s[1] >> 16) ^ (s[2] << 16) ^ s[6] ^ (s[6] << 16) ^ (s[7] & 0xffff0000) ^ (s[7] >> 16); + u[3] = block[3] ^ (s[0] & 0xffff) ^ (s[0] << 16) ^ (s[1] & 0xffff) ^ (s[1] << 16) ^ (s[1] >> 16) ^ + (s[2] << 16) ^ (s[2] >> 16) ^ (s[3] << 16) ^ s[6] ^ (s[6] << 16) ^ (s[6] >> 16) ^ (s[7] & 0xffff) ^ (s[7] << 16) ^ (s[7] >> 16); + u[4] = block[4] ^ (s[0] & 0xffff0000) ^ (s[0] << 16) ^ (s[0] >> 16) ^ + (s[1] & 0xffff0000) ^ (s[1] >> 16) ^ (s[2] << 16) ^ (s[2] >> 16) ^ (s[3] << 16) ^ (s[3] >> 16) ^ (s[4] << 16) ^ (s[6] << 16) ^ (s[6] >> 16) ^ (s[7] & 0xffff) ^ (s[7] << 16) ^ (s[7] >> 16); + u[5] = block[5] ^ (s[0] << 16) ^ (s[0] >> 16) ^ (s[0] & 0xffff0000) ^ + (s[1] & 0xffff) ^ s[2] ^ (s[2] >> 16) ^ (s[3] << 16) ^ (s[3] >> 16) ^ (s[4] << 16) ^ (s[4] >> 16) ^ (s[5] << 16) ^ (s[6] << 16) ^ (s[6] >> 16) ^ (s[7] & 0xffff0000) ^ (s[7] << 16) ^ (s[7] >> 16); + u[6] = block[6] ^ s[0] ^ (s[1] >> 16) ^ (s[2] << 16) ^ s[3] ^ (s[3] >> 16) + ^ (s[4] << 16) ^ (s[4] >> 16) ^ (s[5] << 16) ^ (s[5] >> 16) ^ s[6] ^ (s[6] << 16) ^ (s[6] >> 16) ^ (s[7] << 16); + u[7] = block[7] ^ (s[0] & 0xffff0000) ^ (s[0] << 16) ^ (s[1] & 0xffff) ^ + (s[1] << 16) ^ (s[2] >> 16) ^ (s[3] << 16) ^ s[4] ^ (s[4] >> 16) ^ (s[5] << 16) ^ (s[5] >> 16) ^ (s[6] >> 16) ^ (s[7] & 0xffff) ^ (s[7] << 16) ^ (s[7] >> 16); + + /* 1 round of the LFSR (a mixing transformation) and xor with */ + v[0] = ctx->hash[0] ^ (u[1] << 16) ^ (u[0] >> 16); + v[1] = ctx->hash[1] ^ (u[2] << 16) ^ (u[1] >> 16); + v[2] = ctx->hash[2] ^ (u[3] << 16) ^ (u[2] >> 16); + v[3] = ctx->hash[3] ^ (u[4] << 16) ^ (u[3] >> 16); + v[4] = ctx->hash[4] ^ (u[5] << 16) ^ (u[4] >> 16); + v[5] = ctx->hash[5] ^ (u[6] << 16) ^ (u[5] >> 16); + v[6] = ctx->hash[6] ^ (u[7] << 16) ^ (u[6] >> 16); + v[7] = ctx->hash[7] ^ (u[0] & 0xffff0000) ^ (u[0] << 16) ^ (u[1] & 0xffff0000) ^ (u[1] << 16) ^ (u[6] << 16) ^ (u[7] & 0xffff0000) ^ (u[7] >> 16); + + /* 61 rounds of LFSR, mixing up hash */ + ctx->hash[0] = (v[0] & 0xffff0000) ^ (v[0] << 16) ^ (v[0] >> 16) ^ + (v[1] >> 16) ^ (v[1] & 0xffff0000) ^ (v[2] << 16) ^ + (v[3] >> 16) ^ (v[4] << 16) ^ (v[5] >> 16) ^ v[5] ^ + (v[6] >> 16) ^ (v[7] << 16) ^ (v[7] >> 16) ^ (v[7] & 0xffff); + ctx->hash[1] = (v[0] << 16) ^ (v[0] >> 16) ^ (v[0] & 0xffff0000) ^ + (v[1] & 0xffff) ^ v[2] ^ (v[2] >> 16) ^ (v[3] << 16) ^ + (v[4] >> 16) ^ (v[5] << 16) ^ (v[6] << 16) ^ v[6] ^ + (v[7] & 0xffff0000) ^ (v[7] >> 16); + ctx->hash[2] = (v[0] & 0xffff) ^ (v[0] << 16) ^ (v[1] << 16) ^ + (v[1] >> 16) ^ (v[1] & 0xffff0000) ^ (v[2] << 16) ^ (v[3] >> 16) ^ + v[3] ^ (v[4] << 16) ^ (v[5] >> 16) ^ v[6] ^ (v[6] >> 16) ^ + (v[7] & 0xffff) ^ (v[7] << 16) ^ (v[7] >> 16); + ctx->hash[3] = (v[0] << 16) ^ (v[0] >> 16) ^ (v[0] & 0xffff0000) ^ + (v[1] & 0xffff0000) ^ (v[1] >> 16) ^ (v[2] << 16) ^ + (v[2] >> 16) ^ v[2] ^ (v[3] << 16) ^ (v[4] >> 16) ^ v[4] ^ + (v[5] << 16) ^ (v[6] << 16) ^ (v[7] & 0xffff) ^ (v[7] >> 16); + ctx->hash[4] = (v[0] >> 16) ^ (v[1] << 16) ^ v[1] ^ (v[2] >> 16) ^ v[2] ^ + (v[3] << 16) ^ (v[3] >> 16) ^ v[3] ^ (v[4] << 16) ^ + (v[5] >> 16) ^ v[5] ^ (v[6] << 16) ^ (v[6] >> 16) ^ (v[7] << 16); + ctx->hash[5] = (v[0] << 16) ^ (v[0] & 0xffff0000) ^ (v[1] << 16) ^ + (v[1] >> 16) ^ (v[1] & 0xffff0000) ^ (v[2] << 16) ^ v[2] ^ + (v[3] >> 16) ^ v[3] ^ (v[4] << 16) ^ (v[4] >> 16) ^ v[4] ^ + (v[5] << 16) ^ (v[6] << 16) ^ (v[6] >> 16) ^ v[6] ^ + (v[7] << 16) ^ (v[7] >> 16) ^ (v[7] & 0xffff0000); + ctx->hash[6] = v[0] ^ v[2] ^ (v[2] >> 16) ^ v[3] ^ (v[3] << 16) ^ v[4] ^ + (v[4] >> 16) ^ (v[5] << 16) ^ (v[5] >> 16) ^ v[5] ^ + (v[6] << 16) ^ (v[6] >> 16) ^ v[6] ^ (v[7] << 16) ^ v[7]; + ctx->hash[7] = v[0] ^ (v[0] >> 16) ^ (v[1] << 16) ^ (v[1] >> 16) ^ + (v[2] << 16) ^ (v[3] >> 16) ^ v[3] ^ (v[4] << 16) ^ v[4] ^ + (v[5] >> 16) ^ v[5] ^ (v[6] << 16) ^ (v[6] >> 16) ^ (v[7] << 16) ^ v[7]; +} + +/** + * This function calculates hash value by 256-bit blocks. + * It updates 256-bit check sum as follows: + * *(uint256_t)(ctx->sum) += *(uint256_t*)block; + * and then updates intermediate hash value ctx->hash + * by calling rhash_gost94_block_compress(). + * + * @param ctx algorithm context + * @param block the 256-bit message block to process + */ +static void rhash_gost94_compute_sum_and_hash(gost94_ctx * ctx, const uint* block, MAYBE_LOCAL const rhash_gost94_sbox *sbox) +{ +#if !__ENDIAN_LITTLE__ + uint block_le[8]; /* tmp buffer for little endian number */ +#define LOAD_BLOCK_LE(i) (block_le[i] = SWAP32(block[i])) +#else +#define block_le block +#define LOAD_BLOCK_LE(i) +#endif + + uint i, carry = 0; + + /* compute the 256-bit sum */ + for (i = 0; i < 8; i++) { + const uint old = ctx->sum[i]; + LOAD_BLOCK_LE(i); + ctx->sum[i] += block_le[i] + carry; + carry = (ctx->sum[i] < old || ctx->sum[i] < block_le[i] ? 1 : 0); + } + + /* update message hash */ + rhash_gost94_block_compress(ctx, block_le, sbox); +} + +#define IS_ALIGNED_32(p) (!(((const char*)(p) - (const char*)0) & 3)) + +/** + * Calculate message hash. + * Can be called repeatedly with chunks of the message to be hashed. + * + * @param ctx the algorithm context containing current hashing state + * @param msg message chunk + * @param size length of the message chunk + */ +inline void gost94_update(gost94_ctx *ctx, const uchar* msg, uint size, MAYBE_LOCAL const rhash_gost94_sbox *sbox) +{ + uint index = ctx->length & 31; + ctx->length += size; + + /* fill partial block */ + if (index) { + uint left = GOST94_BLOCK_SIZE - index; + memcpy_pp(ctx->message + index, msg, (size < left ? size : left)); + if (size < left) + return; + + /* process partial block */ + rhash_gost94_compute_sum_and_hash(ctx, (uint*)ctx->message, sbox); + msg += left; + size -= left; + } + while(size >= GOST94_BLOCK_SIZE) { + uint* aligned_message_block; + if (IS_ALIGNED_32(msg)) { + /* the most common case is processing of an already aligned message + on little-endian CPU without copying it */ + aligned_message_block = (uint*)msg; + } else { + memcpy_pp(ctx->message, msg, GOST94_BLOCK_SIZE); + aligned_message_block = (uint*)ctx->message; + } + + rhash_gost94_compute_sum_and_hash(ctx, aligned_message_block, sbox); + msg += GOST94_BLOCK_SIZE; + size -= GOST94_BLOCK_SIZE; + } + if (size) { + /* save leftovers */ + memcpy_pp(ctx->message, msg, size); + } +} + +#if !__ENDIAN_LITTLE__ +static void rhash_u32_swap_copy(void* to, const void* from, uint length) { + uint i; + uint *pO, *pI; + pO = (uint *)to; + pI = (uint *)from; + length >>= 2; + for (i = 0; i < length; ++i) { + *pO++ = SWAP32(*pI++); + } +} + +#define le32_copy(to, from, length) rhash_u32_swap_copy((to), (from), (length)) +#else /* !__ENDIAN_LITTLE__ */ +#define le32_copy(to, from, length) memcpy_pp((to), (from), (length)) +#endif /* !__ENDIAN_LITTLE__ */ + +/** + * Finish hashing and store message digest into given array. + * + * @param ctx the algorithm context containing current hashing state + * @param result calculated hash in binary form + */ +inline void gost94_final(gost94_ctx *ctx, uchar *result, MAYBE_LOCAL const rhash_gost94_sbox *sbox) +{ + uint index = ctx->length & 31; + uint* msg32 = (uint*)ctx->message; + + /* pad the last block with zeroes and hash it */ + if (index > 0) { + memset_p(ctx->message + index, 0, 32 - index); + rhash_gost94_compute_sum_and_hash(ctx, msg32, sbox); + } + + /* hash the message length and the sum */ + msg32[0] = ctx->length << 3; + msg32[1] = ctx->length >> 29; + memset_p(msg32 + 2, 0, sizeof(uint)*6); + + rhash_gost94_block_compress(ctx, msg32, sbox); + rhash_gost94_block_compress(ctx, ctx->sum, sbox); + + /* convert hash state to result bytes */ + le32_copy(result, ctx->hash, GOST94_HASH_LENGTH); +} + +/* ROTL macros rotate a 32-bit word left by n bits */ +#define ROTL32(dword, n) ((dword) << (n) ^ ((dword) >> (32 - (n)))) + +#if GOST94_FLAT_INIT +__constant uint precomp_table[1024] = { + 0x00072000U, 0x00075000U, 0x00074800U, 0x00071000U, 0x00076800U, 0x00074000U, 0x00070000U, 0x00077000U, + 0x00073000U, 0x00075800U, 0x00070800U, 0x00076000U, 0x00073800U, 0x00077800U, 0x00072800U, 0x00071800U, + 0x0005a000U, 0x0005d000U, 0x0005c800U, 0x00059000U, 0x0005e800U, 0x0005c000U, 0x00058000U, 0x0005f000U, + 0x0005b000U, 0x0005d800U, 0x00058800U, 0x0005e000U, 0x0005b800U, 0x0005f800U, 0x0005a800U, 0x00059800U, + 0x00022000U, 0x00025000U, 0x00024800U, 0x00021000U, 0x00026800U, 0x00024000U, 0x00020000U, 0x00027000U, + 0x00023000U, 0x00025800U, 0x00020800U, 0x00026000U, 0x00023800U, 0x00027800U, 0x00022800U, 0x00021800U, + 0x00062000U, 0x00065000U, 0x00064800U, 0x00061000U, 0x00066800U, 0x00064000U, 0x00060000U, 0x00067000U, + 0x00063000U, 0x00065800U, 0x00060800U, 0x00066000U, 0x00063800U, 0x00067800U, 0x00062800U, 0x00061800U, + 0x00032000U, 0x00035000U, 0x00034800U, 0x00031000U, 0x00036800U, 0x00034000U, 0x00030000U, 0x00037000U, + 0x00033000U, 0x00035800U, 0x00030800U, 0x00036000U, 0x00033800U, 0x00037800U, 0x00032800U, 0x00031800U, + 0x0006a000U, 0x0006d000U, 0x0006c800U, 0x00069000U, 0x0006e800U, 0x0006c000U, 0x00068000U, 0x0006f000U, + 0x0006b000U, 0x0006d800U, 0x00068800U, 0x0006e000U, 0x0006b800U, 0x0006f800U, 0x0006a800U, 0x00069800U, + 0x0007a000U, 0x0007d000U, 0x0007c800U, 0x00079000U, 0x0007e800U, 0x0007c000U, 0x00078000U, 0x0007f000U, + 0x0007b000U, 0x0007d800U, 0x00078800U, 0x0007e000U, 0x0007b800U, 0x0007f800U, 0x0007a800U, 0x00079800U, + 0x00052000U, 0x00055000U, 0x00054800U, 0x00051000U, 0x00056800U, 0x00054000U, 0x00050000U, 0x00057000U, + 0x00053000U, 0x00055800U, 0x00050800U, 0x00056000U, 0x00053800U, 0x00057800U, 0x00052800U, 0x00051800U, + 0x00012000U, 0x00015000U, 0x00014800U, 0x00011000U, 0x00016800U, 0x00014000U, 0x00010000U, 0x00017000U, + 0x00013000U, 0x00015800U, 0x00010800U, 0x00016000U, 0x00013800U, 0x00017800U, 0x00012800U, 0x00011800U, + 0x0001a000U, 0x0001d000U, 0x0001c800U, 0x00019000U, 0x0001e800U, 0x0001c000U, 0x00018000U, 0x0001f000U, + 0x0001b000U, 0x0001d800U, 0x00018800U, 0x0001e000U, 0x0001b800U, 0x0001f800U, 0x0001a800U, 0x00019800U, + 0x00042000U, 0x00045000U, 0x00044800U, 0x00041000U, 0x00046800U, 0x00044000U, 0x00040000U, 0x00047000U, + 0x00043000U, 0x00045800U, 0x00040800U, 0x00046000U, 0x00043800U, 0x00047800U, 0x00042800U, 0x00041800U, + 0x0000a000U, 0x0000d000U, 0x0000c800U, 0x00009000U, 0x0000e800U, 0x0000c000U, 0x00008000U, 0x0000f000U, + 0x0000b000U, 0x0000d800U, 0x00008800U, 0x0000e000U, 0x0000b800U, 0x0000f800U, 0x0000a800U, 0x00009800U, + 0x00002000U, 0x00005000U, 0x00004800U, 0x00001000U, 0x00006800U, 0x00004000U, 0x00000000U, 0x00007000U, + 0x00003000U, 0x00005800U, 0x00000800U, 0x00006000U, 0x00003800U, 0x00007800U, 0x00002800U, 0x00001800U, + 0x0003a000U, 0x0003d000U, 0x0003c800U, 0x00039000U, 0x0003e800U, 0x0003c000U, 0x00038000U, 0x0003f000U, + 0x0003b000U, 0x0003d800U, 0x00038800U, 0x0003e000U, 0x0003b800U, 0x0003f800U, 0x0003a800U, 0x00039800U, + 0x0002a000U, 0x0002d000U, 0x0002c800U, 0x00029000U, 0x0002e800U, 0x0002c000U, 0x00028000U, 0x0002f000U, + 0x0002b000U, 0x0002d800U, 0x00028800U, 0x0002e000U, 0x0002b800U, 0x0002f800U, 0x0002a800U, 0x00029800U, + 0x0004a000U, 0x0004d000U, 0x0004c800U, 0x00049000U, 0x0004e800U, 0x0004c000U, 0x00048000U, 0x0004f000U, + 0x0004b000U, 0x0004d800U, 0x00048800U, 0x0004e000U, 0x0004b800U, 0x0004f800U, 0x0004a800U, 0x00049800U, + 0x03a80000U, 0x03c00000U, 0x03880000U, 0x03e80000U, 0x03d00000U, 0x03980000U, 0x03a00000U, 0x03900000U, + 0x03f00000U, 0x03f80000U, 0x03e00000U, 0x03b80000U, 0x03b00000U, 0x03800000U, 0x03c80000U, 0x03d80000U, + 0x06a80000U, 0x06c00000U, 0x06880000U, 0x06e80000U, 0x06d00000U, 0x06980000U, 0x06a00000U, 0x06900000U, + 0x06f00000U, 0x06f80000U, 0x06e00000U, 0x06b80000U, 0x06b00000U, 0x06800000U, 0x06c80000U, 0x06d80000U, + 0x05280000U, 0x05400000U, 0x05080000U, 0x05680000U, 0x05500000U, 0x05180000U, 0x05200000U, 0x05100000U, + 0x05700000U, 0x05780000U, 0x05600000U, 0x05380000U, 0x05300000U, 0x05000000U, 0x05480000U, 0x05580000U, + 0x00a80000U, 0x00c00000U, 0x00880000U, 0x00e80000U, 0x00d00000U, 0x00980000U, 0x00a00000U, 0x00900000U, + 0x00f00000U, 0x00f80000U, 0x00e00000U, 0x00b80000U, 0x00b00000U, 0x00800000U, 0x00c80000U, 0x00d80000U, + 0x00280000U, 0x00400000U, 0x00080000U, 0x00680000U, 0x00500000U, 0x00180000U, 0x00200000U, 0x00100000U, + 0x00700000U, 0x00780000U, 0x00600000U, 0x00380000U, 0x00300000U, 0x00000000U, 0x00480000U, 0x00580000U, + 0x04280000U, 0x04400000U, 0x04080000U, 0x04680000U, 0x04500000U, 0x04180000U, 0x04200000U, 0x04100000U, + 0x04700000U, 0x04780000U, 0x04600000U, 0x04380000U, 0x04300000U, 0x04000000U, 0x04480000U, 0x04580000U, + 0x04a80000U, 0x04c00000U, 0x04880000U, 0x04e80000U, 0x04d00000U, 0x04980000U, 0x04a00000U, 0x04900000U, + 0x04f00000U, 0x04f80000U, 0x04e00000U, 0x04b80000U, 0x04b00000U, 0x04800000U, 0x04c80000U, 0x04d80000U, + 0x07a80000U, 0x07c00000U, 0x07880000U, 0x07e80000U, 0x07d00000U, 0x07980000U, 0x07a00000U, 0x07900000U, + 0x07f00000U, 0x07f80000U, 0x07e00000U, 0x07b80000U, 0x07b00000U, 0x07800000U, 0x07c80000U, 0x07d80000U, + 0x07280000U, 0x07400000U, 0x07080000U, 0x07680000U, 0x07500000U, 0x07180000U, 0x07200000U, 0x07100000U, + 0x07700000U, 0x07780000U, 0x07600000U, 0x07380000U, 0x07300000U, 0x07000000U, 0x07480000U, 0x07580000U, + 0x02280000U, 0x02400000U, 0x02080000U, 0x02680000U, 0x02500000U, 0x02180000U, 0x02200000U, 0x02100000U, + 0x02700000U, 0x02780000U, 0x02600000U, 0x02380000U, 0x02300000U, 0x02000000U, 0x02480000U, 0x02580000U, + 0x03280000U, 0x03400000U, 0x03080000U, 0x03680000U, 0x03500000U, 0x03180000U, 0x03200000U, 0x03100000U, + 0x03700000U, 0x03780000U, 0x03600000U, 0x03380000U, 0x03300000U, 0x03000000U, 0x03480000U, 0x03580000U, + 0x06280000U, 0x06400000U, 0x06080000U, 0x06680000U, 0x06500000U, 0x06180000U, 0x06200000U, 0x06100000U, + 0x06700000U, 0x06780000U, 0x06600000U, 0x06380000U, 0x06300000U, 0x06000000U, 0x06480000U, 0x06580000U, + 0x05a80000U, 0x05c00000U, 0x05880000U, 0x05e80000U, 0x05d00000U, 0x05980000U, 0x05a00000U, 0x05900000U, + 0x05f00000U, 0x05f80000U, 0x05e00000U, 0x05b80000U, 0x05b00000U, 0x05800000U, 0x05c80000U, 0x05d80000U, + 0x01280000U, 0x01400000U, 0x01080000U, 0x01680000U, 0x01500000U, 0x01180000U, 0x01200000U, 0x01100000U, + 0x01700000U, 0x01780000U, 0x01600000U, 0x01380000U, 0x01300000U, 0x01000000U, 0x01480000U, 0x01580000U, + 0x02a80000U, 0x02c00000U, 0x02880000U, 0x02e80000U, 0x02d00000U, 0x02980000U, 0x02a00000U, 0x02900000U, + 0x02f00000U, 0x02f80000U, 0x02e00000U, 0x02b80000U, 0x02b00000U, 0x02800000U, 0x02c80000U, 0x02d80000U, + 0x01a80000U, 0x01c00000U, 0x01880000U, 0x01e80000U, 0x01d00000U, 0x01980000U, 0x01a00000U, 0x01900000U, + 0x01f00000U, 0x01f80000U, 0x01e00000U, 0x01b80000U, 0x01b00000U, 0x01800000U, 0x01c80000U, 0x01d80000U, + 0x30000002U, 0x60000002U, 0x38000002U, 0x08000002U, 0x28000002U, 0x78000002U, 0x68000002U, 0x40000002U, + 0x20000002U, 0x50000002U, 0x48000002U, 0x70000002U, 0x00000002U, 0x18000002U, 0x58000002U, 0x10000002U, + 0xb0000005U, 0xe0000005U, 0xb8000005U, 0x88000005U, 0xa8000005U, 0xf8000005U, 0xe8000005U, 0xc0000005U, + 0xa0000005U, 0xd0000005U, 0xc8000005U, 0xf0000005U, 0x80000005U, 0x98000005U, 0xd8000005U, 0x90000005U, + 0x30000005U, 0x60000005U, 0x38000005U, 0x08000005U, 0x28000005U, 0x78000005U, 0x68000005U, 0x40000005U, + 0x20000005U, 0x50000005U, 0x48000005U, 0x70000005U, 0x00000005U, 0x18000005U, 0x58000005U, 0x10000005U, + 0x30000000U, 0x60000000U, 0x38000000U, 0x08000000U, 0x28000000U, 0x78000000U, 0x68000000U, 0x40000000U, + 0x20000000U, 0x50000000U, 0x48000000U, 0x70000000U, 0x00000000U, 0x18000000U, 0x58000000U, 0x10000000U, + 0xb0000003U, 0xe0000003U, 0xb8000003U, 0x88000003U, 0xa8000003U, 0xf8000003U, 0xe8000003U, 0xc0000003U, + 0xa0000003U, 0xd0000003U, 0xc8000003U, 0xf0000003U, 0x80000003U, 0x98000003U, 0xd8000003U, 0x90000003U, + 0x30000001U, 0x60000001U, 0x38000001U, 0x08000001U, 0x28000001U, 0x78000001U, 0x68000001U, 0x40000001U, + 0x20000001U, 0x50000001U, 0x48000001U, 0x70000001U, 0x00000001U, 0x18000001U, 0x58000001U, 0x10000001U, + 0xb0000000U, 0xe0000000U, 0xb8000000U, 0x88000000U, 0xa8000000U, 0xf8000000U, 0xe8000000U, 0xc0000000U, + 0xa0000000U, 0xd0000000U, 0xc8000000U, 0xf0000000U, 0x80000000U, 0x98000000U, 0xd8000000U, 0x90000000U, + 0xb0000006U, 0xe0000006U, 0xb8000006U, 0x88000006U, 0xa8000006U, 0xf8000006U, 0xe8000006U, 0xc0000006U, + 0xa0000006U, 0xd0000006U, 0xc8000006U, 0xf0000006U, 0x80000006U, 0x98000006U, 0xd8000006U, 0x90000006U, + 0xb0000001U, 0xe0000001U, 0xb8000001U, 0x88000001U, 0xa8000001U, 0xf8000001U, 0xe8000001U, 0xc0000001U, + 0xa0000001U, 0xd0000001U, 0xc8000001U, 0xf0000001U, 0x80000001U, 0x98000001U, 0xd8000001U, 0x90000001U, + 0x30000003U, 0x60000003U, 0x38000003U, 0x08000003U, 0x28000003U, 0x78000003U, 0x68000003U, 0x40000003U, + 0x20000003U, 0x50000003U, 0x48000003U, 0x70000003U, 0x00000003U, 0x18000003U, 0x58000003U, 0x10000003U, + 0x30000004U, 0x60000004U, 0x38000004U, 0x08000004U, 0x28000004U, 0x78000004U, 0x68000004U, 0x40000004U, + 0x20000004U, 0x50000004U, 0x48000004U, 0x70000004U, 0x00000004U, 0x18000004U, 0x58000004U, 0x10000004U, + 0xb0000002U, 0xe0000002U, 0xb8000002U, 0x88000002U, 0xa8000002U, 0xf8000002U, 0xe8000002U, 0xc0000002U, + 0xa0000002U, 0xd0000002U, 0xc8000002U, 0xf0000002U, 0x80000002U, 0x98000002U, 0xd8000002U, 0x90000002U, + 0xb0000004U, 0xe0000004U, 0xb8000004U, 0x88000004U, 0xa8000004U, 0xf8000004U, 0xe8000004U, 0xc0000004U, + 0xa0000004U, 0xd0000004U, 0xc8000004U, 0xf0000004U, 0x80000004U, 0x98000004U, 0xd8000004U, 0x90000004U, + 0x30000006U, 0x60000006U, 0x38000006U, 0x08000006U, 0x28000006U, 0x78000006U, 0x68000006U, 0x40000006U, + 0x20000006U, 0x50000006U, 0x48000006U, 0x70000006U, 0x00000006U, 0x18000006U, 0x58000006U, 0x10000006U, + 0xb0000007U, 0xe0000007U, 0xb8000007U, 0x88000007U, 0xa8000007U, 0xf8000007U, 0xe8000007U, 0xc0000007U, + 0xa0000007U, 0xd0000007U, 0xc8000007U, 0xf0000007U, 0x80000007U, 0x98000007U, 0xd8000007U, 0x90000007U, + 0x30000007U, 0x60000007U, 0x38000007U, 0x08000007U, 0x28000007U, 0x78000007U, 0x68000007U, 0x40000007U, + 0x20000007U, 0x50000007U, 0x48000007U, 0x70000007U, 0x00000007U, 0x18000007U, 0x58000007U, 0x10000007U, + 0x000000e8U, 0x000000d8U, 0x000000a0U, 0x00000088U, 0x00000098U, 0x000000f8U, 0x000000a8U, 0x000000c8U, + 0x00000080U, 0x000000d0U, 0x000000f0U, 0x000000b8U, 0x000000b0U, 0x000000c0U, 0x00000090U, 0x000000e0U, + 0x000007e8U, 0x000007d8U, 0x000007a0U, 0x00000788U, 0x00000798U, 0x000007f8U, 0x000007a8U, 0x000007c8U, + 0x00000780U, 0x000007d0U, 0x000007f0U, 0x000007b8U, 0x000007b0U, 0x000007c0U, 0x00000790U, 0x000007e0U, + 0x000006e8U, 0x000006d8U, 0x000006a0U, 0x00000688U, 0x00000698U, 0x000006f8U, 0x000006a8U, 0x000006c8U, + 0x00000680U, 0x000006d0U, 0x000006f0U, 0x000006b8U, 0x000006b0U, 0x000006c0U, 0x00000690U, 0x000006e0U, + 0x00000068U, 0x00000058U, 0x00000020U, 0x00000008U, 0x00000018U, 0x00000078U, 0x00000028U, 0x00000048U, + 0x00000000U, 0x00000050U, 0x00000070U, 0x00000038U, 0x00000030U, 0x00000040U, 0x00000010U, 0x00000060U, + 0x000002e8U, 0x000002d8U, 0x000002a0U, 0x00000288U, 0x00000298U, 0x000002f8U, 0x000002a8U, 0x000002c8U, + 0x00000280U, 0x000002d0U, 0x000002f0U, 0x000002b8U, 0x000002b0U, 0x000002c0U, 0x00000290U, 0x000002e0U, + 0x000003e8U, 0x000003d8U, 0x000003a0U, 0x00000388U, 0x00000398U, 0x000003f8U, 0x000003a8U, 0x000003c8U, + 0x00000380U, 0x000003d0U, 0x000003f0U, 0x000003b8U, 0x000003b0U, 0x000003c0U, 0x00000390U, 0x000003e0U, + 0x00000568U, 0x00000558U, 0x00000520U, 0x00000508U, 0x00000518U, 0x00000578U, 0x00000528U, 0x00000548U, + 0x00000500U, 0x00000550U, 0x00000570U, 0x00000538U, 0x00000530U, 0x00000540U, 0x00000510U, 0x00000560U, + 0x00000268U, 0x00000258U, 0x00000220U, 0x00000208U, 0x00000218U, 0x00000278U, 0x00000228U, 0x00000248U, + 0x00000200U, 0x00000250U, 0x00000270U, 0x00000238U, 0x00000230U, 0x00000240U, 0x00000210U, 0x00000260U, + 0x000004e8U, 0x000004d8U, 0x000004a0U, 0x00000488U, 0x00000498U, 0x000004f8U, 0x000004a8U, 0x000004c8U, + 0x00000480U, 0x000004d0U, 0x000004f0U, 0x000004b8U, 0x000004b0U, 0x000004c0U, 0x00000490U, 0x000004e0U, + 0x00000168U, 0x00000158U, 0x00000120U, 0x00000108U, 0x00000118U, 0x00000178U, 0x00000128U, 0x00000148U, + 0x00000100U, 0x00000150U, 0x00000170U, 0x00000138U, 0x00000130U, 0x00000140U, 0x00000110U, 0x00000160U, + 0x000001e8U, 0x000001d8U, 0x000001a0U, 0x00000188U, 0x00000198U, 0x000001f8U, 0x000001a8U, 0x000001c8U, + 0x00000180U, 0x000001d0U, 0x000001f0U, 0x000001b8U, 0x000001b0U, 0x000001c0U, 0x00000190U, 0x000001e0U, + 0x00000768U, 0x00000758U, 0x00000720U, 0x00000708U, 0x00000718U, 0x00000778U, 0x00000728U, 0x00000748U, + 0x00000700U, 0x00000750U, 0x00000770U, 0x00000738U, 0x00000730U, 0x00000740U, 0x00000710U, 0x00000760U, + 0x00000368U, 0x00000358U, 0x00000320U, 0x00000308U, 0x00000318U, 0x00000378U, 0x00000328U, 0x00000348U, + 0x00000300U, 0x00000350U, 0x00000370U, 0x00000338U, 0x00000330U, 0x00000340U, 0x00000310U, 0x00000360U, + 0x000005e8U, 0x000005d8U, 0x000005a0U, 0x00000588U, 0x00000598U, 0x000005f8U, 0x000005a8U, 0x000005c8U, + 0x00000580U, 0x000005d0U, 0x000005f0U, 0x000005b8U, 0x000005b0U, 0x000005c0U, 0x00000590U, 0x000005e0U, + 0x00000468U, 0x00000458U, 0x00000420U, 0x00000408U, 0x00000418U, 0x00000478U, 0x00000428U, 0x00000448U, + 0x00000400U, 0x00000450U, 0x00000470U, 0x00000438U, 0x00000430U, 0x00000440U, 0x00000410U, 0x00000460U, + 0x00000668U, 0x00000658U, 0x00000620U, 0x00000608U, 0x00000618U, 0x00000678U, 0x00000628U, 0x00000648U, + 0x00000600U, 0x00000650U, 0x00000670U, 0x00000638U, 0x00000630U, 0x00000640U, 0x00000610U, 0x00000660U, +}; +#elif GOST94_CRYPTPRO +/* Parameter set recommended by RFC 4357. + * Eight 4-bit S-Boxes as defined by RFC 4357 section 11.2 */ +__constant uchar sbox[8][16] = { + { 10, 4, 5, 6, 8, 1, 3, 7, 13, 12, 14, 0, 9, 2, 11, 15 }, + { 5, 15, 4, 0, 2, 13, 11, 9, 1, 7, 6, 3, 12, 14, 10, 8 }, + { 7, 15, 12, 14, 9, 4, 1, 0, 3, 11, 5, 2, 6, 10, 8, 13 }, + { 4, 10, 7, 12, 0, 15, 2, 8, 14, 1, 6, 5, 13, 11, 9, 3 }, + { 7, 6, 4, 11, 9, 12, 2, 10, 1, 8, 0, 14, 15, 13, 3, 5 }, + { 7, 6, 2, 4, 13, 9, 15, 0, 10, 1, 5, 11, 8, 14, 12, 3 }, + { 13, 14, 4, 1, 7, 0, 5, 10, 3, 12, 8, 15, 6, 2, 9, 11 }, + { 1, 3, 10, 9, 5, 11, 4, 15, 8, 6, 7, 14, 13, 0, 2, 12 } +}; +#else +/* Test parameters set. Eight 4-bit S-Boxes defined by GOST R 34.10-94 + * standard for testing the hash function. + * Also given by RFC 4357 section 11.2 */ +__constant uchar sbox[8][16] = { + { 4, 10, 9, 2, 13, 8, 0, 14, 6, 11, 1, 12, 7, 15, 5, 3 }, + { 14, 11, 4, 12, 6, 13, 15, 10, 2, 3, 8, 1, 0, 7, 5, 9 }, + { 5, 8, 1, 13, 10, 3, 4, 2, 14, 15, 12, 7, 6, 0, 9, 11 }, + { 7, 13, 10, 1, 0, 8, 9, 15, 14, 4, 6, 12, 11, 2, 5, 3 }, + { 6, 12, 7, 1, 5, 15, 13, 8, 4, 10, 9, 14, 0, 3, 11, 2 }, + { 4, 11, 10, 0, 7, 2, 1, 13, 3, 6, 8, 5, 9, 12, 15, 14 }, + { 13, 11, 4, 1, 3, 15, 5, 9, 0, 10, 14, 7, 6, 8, 2, 12 }, + { 1, 15, 13, 0, 5, 7, 10, 4, 9, 2, 3, 14, 6, 11, 8, 12 } +}; +#endif + +/** + * Initialize the GOST lookup tables for both parameters sets. + * The lookup table contain 4 KiB in total, so calculating + * it at run-time can save a little space in the executable file + * in trade of consuming some time at program start. + */ +inline void gost94_init_table(MAYBE_LOCAL rhash_gost94_sbox *cur_sbox) +{ + uint lid = get_local_id(0); +#if GOST94_FLAT_INIT + uint ls = get_local_size(0); + uint i; + + for (i = lid; i < 1024; i += ls) + cur_sbox->flat[i] = precomp_table[i]; +#else + uint a, b, i; + uint ax, bx, cx, dx; + + if (lid == 0) { + for (i = 0, a = 0; a < 16; a++) { + ax = (uint)sbox[1][a] << 15; + bx = (uint)sbox[3][a] << 23; + cx = ROTL32((uint)sbox[5][a], 31); + dx = (uint)sbox[7][a] << 7; + + for (b = 0; b < 16; b++, i++) { + cur_sbox->array[0][i] = ax | ((uint)sbox[0][b] << 11); + cur_sbox->array[1][i] = bx | ((uint)sbox[2][b] << 19); + cur_sbox->array[2][i] = cx | ((uint)sbox[4][b] << 27); + cur_sbox->array[3][i] = dx | ((uint)sbox[6][b] << 3); + } + } + } +#endif +#if GOST94_USE_LOCAL + barrier(CLK_LOCAL_MEM_FENCE); +#endif +} + +#endif /* GOST94_H */ diff --git a/run/opencl/opencl_streebog.h b/run/opencl/opencl_streebog.h new file mode 100644 index 0000000000..801e7c8449 --- /dev/null +++ b/run/opencl/opencl_streebog.h @@ -0,0 +1,971 @@ +/* + * Copyright (c) 2013, Alexey Degtyarev . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Porting to OpenCL + optimizations: Copyright (c) 2022 magnum, and those changes + * hereby released to the general public under the following terms: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#ifndef _OPENCL_STREEBOG_H +#define _OPENCL_STREEBOG_H + +#include "opencl_misc.h" + +#if STREEBOG_LOCAL_AX +#define AX loc_buf->Ax +#else +#define AX Ax +#endif + +#if STREEBOG_UNROLL +#define UNROLL8 _Pragma("unroll 8") +#define UNROLL16 _Pragma("unroll 16") +#else +#define UNROLL8 +#define UNROLL16 +#endif + +#if STREEBOG_VECTOR +#define memcpy512(dst, src) do { \ + (dst)->VWORD = (src)->VWORD; \ + } while (0) +#define memcpy256 memcpy512 +#else +#define memcpy512(dst, src) do { \ + UNROLL16 \ + for (uint i = 0; i < 16; i++) \ + (dst)->DWORD[i] = (src)->DWORD[i]; \ + } while (0) + +#define memcpy256(dst, src) do { \ + UNROLL8 \ + for (uint i = 0; i < 8; i++) \ + (dst)->DWORD[i] = (src)->DWORD[i]; \ + } while (0) +#endif + +typedef union { + ulong QWORD[4]; + uint DWORD[8]; + uint8 VWORD; + uchar BYTES[32]; +} uint256_u; + +typedef union { + ulong QWORD[8]; + uint DWORD[16]; + uint16 VWORD; + uchar BYTES[64]; +} uint512_u; + +typedef struct { + uint512_u buffer; + uint512_u h; + uint512_u N; + uint512_u Sigma; + uint bufsize; +#if !STREEBOG512CRYPT && !STREEBOG256CRYPT + uint digest_size; +#endif +} GOST34112012Context; + +typedef struct { + ulong Ax[8][256]; +} localbuf; + +/* let z be 512-bit XOR of x and y */ +#if STREEBOG_VECTOR +#define XOR512(x, y, z) do { \ + (z)->VWORD = (x)->VWORD ^ (y)->VWORD; \ + } while (0) +#else +#define XOR512(x, y, z) do { \ + UNROLL16 \ + for (uint _i = 0; _i < 16; _i++) \ + (z)->DWORD[_i] = (x)->DWORD[_i] ^ (y)->DWORD[_i]; \ + } while (0) +#endif + +/* The main Streebog headache operation */ +#define XLPS(x, y, data) do { \ + uint512_u r; \ + \ + XOR512((x), (y), &(r)); \ + UNROLL8 \ + for (uint _i = 0; _i <= 7; _i++) \ + { \ + (data)->QWORD[_i] = AX[0][(r.QWORD[0] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[1][(r.QWORD[1] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[2][(r.QWORD[2] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[3][(r.QWORD[3] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[4][(r.QWORD[4] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[5][(r.QWORD[5] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[6][(r.QWORD[6] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[7][(r.QWORD[7] >> (_i << 3)) & 0xFF]; \ + } \ + } while (0) + +// Special case of the above where y is all zeros +#define XLPS0(x, data) do { \ + UNROLL8 \ + for (uint _i = 0; _i <= 7; _i++) \ + { \ + (data)->QWORD[_i] = AX[0][((x)->QWORD[0] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[1][((x)->QWORD[1] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[2][((x)->QWORD[2] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[3][((x)->QWORD[3] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[4][((x)->QWORD[4] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[5][((x)->QWORD[5] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[6][((x)->QWORD[6] >> (_i << 3)) & 0xFF]; \ + (data)->QWORD[_i] ^= AX[7][((x)->QWORD[7] >> (_i << 3)) & 0xFF]; \ + } \ + } while (0) + +#define ROUND(i, Ki, data) do { \ + XLPS(Ki, &C[i], Ki); \ + XLPS(Ki, data, data); \ + } while (0) + +__constant const uint512_u C[12] = { + {{ + 0xdd806559f2a64507UL, 0x05767436cc744d23UL, 0xa2422a08a460d315UL, 0x4b7ce09192676901UL, + 0x714eb88d7585c4fcUL, 0x2f6a76432e45d016UL, 0xebcb2f81c0657c1fUL, 0xb1085bda1ecadae9UL + }}, + {{ + 0xe679047021b19bb7UL, 0x55dda21bd7cbcd56UL, 0x5cb561c2db0aa7caUL, 0x9ab5176b12d69958UL, + 0x61d55e0f16b50131UL, 0xf3feea720a232b98UL, 0x4fe39d460f70b5d7UL, 0x6fa3b58aa99d2f1aUL + }}, + {{ + 0x991e96f50aba0ab2UL, 0xc2b6f443867adb31UL, 0xc1c93a376062db09UL, 0xd3e20fe490359eb1UL, + 0xf2ea7514b1297b7bUL, 0x06f15e5f529c1f8bUL, 0x0a39fc286a3d8435UL, 0xf574dcac2bce2fc7UL + }}, + {{ + 0x220cbebc84e3d12eUL, 0x3453eaa193e837f1UL, 0xd8b71333935203beUL, 0xa9d72c82ed03d675UL, + 0x9d721cad685e353fUL, 0x488e857e335c3c7dUL, 0xf948e1a05d71e4ddUL, 0xef1fdfb3e81566d2UL + }}, + {{ + 0x601758fd7c6cfe57UL, 0x7a56a27ea9ea63f5UL, 0xdfff00b723271a16UL, 0xbfcd1747253af5a3UL, + 0x359e35d7800fffbdUL, 0x7f151c1f1686104aUL, 0x9a3f410c6ca92363UL, 0x4bea6bacad474799UL + }}, + {{ + 0xfa68407a46647d6eUL, 0xbf71c57236904f35UL, 0x0af21f66c2bec6b6UL, 0xcffaa6b71c9ab7b4UL, + 0x187f9ab49af08ec6UL, 0x2d66c4f95142a46cUL, 0x6fa4c33b7a3039c0UL, 0xae4faeae1d3ad3d9UL + }}, + {{ + 0x8886564d3a14d493UL, 0x3517454ca23c4af3UL, 0x06476983284a0504UL, 0x0992abc52d822c37UL, + 0xd3473e33197a93c9UL, 0x399ec6c7e6bf87c9UL, 0x51ac86febf240954UL, 0xf4c70e16eeaac5ecUL + }}, + {{ + 0xa47f0dd4bf02e71eUL, 0x36acc2355951a8d9UL, 0x69d18d2bd1a5c42fUL, 0xf4892bcb929b0690UL, + 0x89b4443b4ddbc49aUL, 0x4eb7f8719c36de1eUL, 0x03e7aa020c6e4141UL, 0x9b1f5b424d93c9a7UL + }}, + {{ + 0x7261445183235adbUL, 0x0e38dc92cb1f2a60UL, 0x7b2b8a9aa6079c54UL, 0x800a440bdbb2ceb1UL, + 0x3cd955b7e00d0984UL, 0x3a7d3a1b25894224UL, 0x944c9ad8ec165fdeUL, 0x378f5a541631229bUL + }}, + {{ + 0x74b4c7fb98459cedUL, 0x3698fad1153bb6c3UL, 0x7a1e6c303b7652f4UL, 0x9fe76702af69334bUL, + 0x1fffe18a1b336103UL, 0x8941e71cff8a78dbUL, 0x382ae548b2e4f3f3UL, 0xabbedea680056f52UL + }}, + {{ + 0x6bcaa4cd81f32d1bUL, 0xdea2594ac06fd85dUL, 0xefbacd1d7d476e98UL, 0x8a1d71efea48b9caUL, + 0x2001802114846679UL, 0xd8fa6bbbebab0761UL, 0x3002c6cd635afe94UL, 0x7bcd9ed0efc889fbUL + }}, + {{ + 0x48bc924af11bd720UL, 0xfaf417d5d9b21b99UL, 0xe71da4aa88e12852UL, 0x5d80ef9d1891cc86UL, + 0xf82012d430219f9bUL, 0xcda43c32bcdf1d77UL, 0xd21380b00449b17aUL, 0x378ee767f11631baUL + }} +}; + +__constant const ulong Ax[8][256] = { + { + 0xd01f715b5c7ef8e6UL, 0x16fa240980778325UL, 0xa8a42e857ee049c8UL, 0x6ac1068fa186465bUL, + 0x6e417bd7a2e9320bUL, 0x665c8167a437daabUL, 0x7666681aa89617f6UL, 0x4b959163700bdcf5UL, + 0xf14be6b78df36248UL, 0xc585bd689a625cffUL, 0x9557d7fca67d82cbUL, 0x89f0b969af6dd366UL, + 0xb0833d48749f6c35UL, 0xa1998c23b1ecbc7cUL, 0x8d70c431ac02a736UL, 0xd6dfbc2fd0a8b69eUL, + 0x37aeb3e551fa198bUL, 0x0b7d128a40b5cf9cUL, 0x5a8f2008b5780cbcUL, 0xedec882284e333e5UL, + 0xd25fc177d3c7c2ceUL, 0x5e0f5d50b61778ecUL, 0x1d873683c0c24cb9UL, 0xad040bcbb45d208cUL, + 0x2f89a0285b853c76UL, 0x5732fff6791b8d58UL, 0x3e9311439ef6ec3fUL, 0xc9183a809fd3c00fUL, + 0x83adf3f5260a01eeUL, 0xa6791941f4e8ef10UL, 0x103ae97d0ca1cd5dUL, 0x2ce948121dee1b4aUL, + 0x39738421dbf2bf53UL, 0x093da2a6cf0cf5b4UL, 0xcd9847d89cbcb45fUL, 0xf9561c078b2d8ae8UL, + 0x9c6a755a6971777fUL, 0xbc1ebaa0712ef0c5UL, 0x72e61542abf963a6UL, 0x78bb5fde229eb12eUL, + 0x14ba94250fceb90dUL, 0x844d6697630e5282UL, 0x98ea08026a1e032fUL, 0xf06bbea144217f5cUL, + 0xdb6263d11ccb377aUL, 0x641c314b2b8ee083UL, 0x320e96ab9b4770cfUL, 0x1ee7deb986a96b85UL, + 0xe96cf57a878c47b5UL, 0xfdd6615f8842feb8UL, 0xc83862965601dd1bUL, 0x2ea9f83e92572162UL, + 0xf876441142ff97fcUL, 0xeb2c455608357d9dUL, 0x5612a7e0b0c9904cUL, 0x6c01cbfb2d500823UL, + 0x4548a6a7fa037a2dUL, 0xabc4c6bf388b6ef4UL, 0xbade77d4fdf8bebdUL, 0x799b07c8eb4cac3aUL, + 0x0c9d87e805b19cf0UL, 0xcb588aac106afa27UL, 0xea0c1d40c1e76089UL, 0x2869354a1e816f1aUL, + 0xff96d17307fbc490UL, 0x9f0a9d602f1a5043UL, 0x96373fc6e016a5f7UL, 0x5292dab8b3a6e41cUL, + 0x9b8ae0382c752413UL, 0x4f15ec3b7364a8a5UL, 0x3fb349555724f12bUL, 0xc7c50d4415db66d7UL, + 0x92b7429ee379d1a7UL, 0xd37f99611a15dfdaUL, 0x231427c05e34a086UL, 0xa439a96d7b51d538UL, + 0xb403401077f01865UL, 0xdda2aea5901d7902UL, 0x0a5d4a9c8967d288UL, 0xc265280adf660f93UL, + 0x8bb0094520d4e94eUL, 0x2a29856691385532UL, 0x42a833c5bf072941UL, 0x73c64d54622b7eb2UL, + 0x07e095624504536cUL, 0x8a905153e906f45aUL, 0x6f6123c16b3b2f1fUL, 0xc6e55552dc097bc3UL, + 0x4468feb133d16739UL, 0xe211e7f0c7398829UL, 0xa2f96419f7879b40UL, 0x19074bdbc3ad38e9UL, + 0xf4ebc3f9474e0b0cUL, 0x43886bd376d53455UL, 0xd8028beb5aa01046UL, 0x51f23282f5cdc320UL, + 0xe7b1c2be0d84e16dUL, 0x081dfab006dee8a0UL, 0x3b33340d544b857bUL, 0x7f5bcabc679ae242UL, + 0x0edd37c48a08a6d8UL, 0x81ed43d9a9b33bc6UL, 0xb1a3655ebd4d7121UL, 0x69a1eeb5e7ed6167UL, + 0xf6ab73d5c8f73124UL, 0x1a67a3e185c61fd5UL, 0x2dc91004d43c065eUL, 0x0240b02c8fb93a28UL, + 0x90f7f2b26cc0eb8fUL, 0x3cd3a16f114fd617UL, 0xaae49ea9f15973e0UL, 0x06c0cd748cd64e78UL, + 0xda423bc7d5192a6eUL, 0xc345701c16b41287UL, 0x6d2193ede4821537UL, 0xfcf639494190e3acUL, + 0x7c3b228621f1c57eUL, 0xfb16ac2b0494b0c0UL, 0xbf7e529a3745d7f9UL, 0x6881b6a32e3f7c73UL, + 0xca78d2bad9b8e733UL, 0xbbfe2fc2342aa3a9UL, 0x0dbddffecc6381e4UL, 0x70a6a56e2440598eUL, + 0xe4d12a844befc651UL, 0x8c509c2765d0ba22UL, 0xee8c6018c28814d9UL, 0x17da7c1f49a59e31UL, + 0x609c4c1328e194d3UL, 0xb3e3d57232f44b09UL, 0x91d7aaa4a512f69bUL, 0x0ffd6fd243dabbccUL, + 0x50d26a943c1fde34UL, 0x6be15e9968545b4fUL, 0x94778fea6faf9fdfUL, 0x2b09dd7058ea4826UL, + 0x677cd9716de5c7bfUL, 0x49d5214fffb2e6ddUL, 0x0360e83a466b273cUL, 0x1fc786af4f7b7691UL, + 0xa0b9d435783ea168UL, 0xd49f0c035f118cb6UL, 0x01205816c9d21d14UL, 0xac2453dd7d8f3d98UL, + 0x545217cc3f70aa64UL, 0x26b4028e9489c9c2UL, 0xdec2469fd6765e3eUL, 0x04807d58036f7450UL, + 0xe5f17292823ddb45UL, 0xf30b569b024a5860UL, 0x62dcfc3fa758aefbUL, 0xe84cad6c4e5e5aa1UL, + 0xccb81fce556ea94bUL, 0x53b282ae7a74f908UL, 0x1b47fbf74c1402c1UL, 0x368eebf39828049fUL, + 0x7afbeff2ad278b06UL, 0xbe5e0a8cfe97caedUL, 0xcfd8f7f413058e77UL, 0xf78b2bc301252c30UL, + 0x4d555c17fcdd928dUL, 0x5f2f05467fc565f8UL, 0x24f4b2a21b30f3eaUL, 0x860dd6bbecb768aaUL, + 0x4c750401350f8f99UL, 0x0000000000000000UL, 0xecccd0344d312ef1UL, 0xb5231806be220571UL, + 0xc105c030990d28afUL, 0x653c695de25cfd97UL, 0x159acc33c61ca419UL, 0xb89ec7f872418495UL, + 0xa9847693b73254dcUL, 0x58cf90243ac13694UL, 0x59efc832f3132b80UL, 0x5c4fed7c39ae42c4UL, + 0x828dabe3efd81cfaUL, 0xd13f294d95ace5f2UL, 0x7d1b7a90e823d86aUL, 0xb643f03cf849224dUL, + 0x3df3f979d89dcb03UL, 0x7426d836272f2ddeUL, 0xdfe21e891fa4432aUL, 0x3a136c1b9d99986fUL, + 0xfa36f43dcd46add4UL, 0xc025982650df35bbUL, 0x856d3e81aadc4f96UL, 0xc4a5e57e53b041ebUL, + 0x4708168b75ba4005UL, 0xaf44bbe73be41aa4UL, 0x971767d029c4b8e3UL, 0xb9be9feebb939981UL, + 0x215497ecd18d9aaeUL, 0x316e7e91dd2c57f3UL, 0xcef8afe2dad79363UL, 0x3853dc371220a247UL, + 0x35ee03c9de4323a3UL, 0xe6919aa8c456fc79UL, 0xe05157dc4880b201UL, 0x7bdbb7e464f59612UL, + 0x127a59518318f775UL, 0x332ecebd52956ddbUL, 0x8f30741d23bb9d1eUL, 0xd922d3fd93720d52UL, + 0x7746300c61440ae2UL, 0x25d4eab4d2e2eefeUL, 0x75068020eefd30caUL, 0x135a01474acaea61UL, + 0x304e268714fe4ae7UL, 0xa519f17bb283c82cUL, 0xdc82f6b359cf6416UL, 0x5baf781e7caa11a8UL, + 0xb2c38d64fb26561dUL, 0x34ce5bdf17913eb7UL, 0x5d6fb56af07c5fd0UL, 0x182713cd0a7f25fdUL, + 0x9e2ac576e6c84d57UL, 0x9aaab82ee5a73907UL, 0xa3d93c0f3e558654UL, 0x7e7b92aaae48ff56UL, + 0x872d8ead256575beUL, 0x41c8dbfff96c0e7dUL, 0x99ca5014a3cc1e3bUL, 0x40e883e930be1369UL, + 0x1ca76e95091051adUL, 0x4e35b42dbab6b5b1UL, 0x05a0254ecabd6944UL, 0xe1710fca8152af15UL, + 0xf22b0e8dcb984574UL, 0xb763a82a319b3f59UL, 0x63fca4296e8ab3efUL, 0x9d4a2d4ca0a36a6bUL, + 0xe331bfe60eeb953dUL, 0xd5bf541596c391a2UL, 0xf5cb9bef8e9c1618UL, 0x46284e9dbc685d11UL, + 0x2074cffa185f87baUL, 0xbd3ee2b6b8fcedd1UL, 0xae64e3f1f23607b0UL, 0xfeb68965ce29d984UL, + 0x55724fdaf6a2b770UL, 0x29496d5cd753720eUL, 0xa75941573d3af204UL, 0x8e102c0bea69800aUL, + 0x111ab16bc573d049UL, 0xd7ffe439197aab8aUL, 0xefac380e0b5a09cdUL, 0x48f579593660fbc9UL, + 0x22347fd697e6bd92UL, 0x61bc1405e13389c7UL, 0x4ab5c975b9d9c1e1UL, 0x80cd1bcf606126d2UL, + 0x7186fd78ed92449aUL, 0x93971a882aabccb3UL, 0x88d0e17f66bfce72UL, 0x27945a985d5bd4d6UL + }, + { + 0xde553f8c05a811c8UL, 0x1906b59631b4f565UL, 0x436e70d6b1964ff7UL, 0x36d343cb8b1e9d85UL, + 0x843dfacc858aab5aUL, 0xfdfc95c299bfc7f9UL, 0x0f634bdea1d51fa2UL, 0x6d458b3b76efb3cdUL, + 0x85c3f77cf8593f80UL, 0x3c91315fbe737cb2UL, 0x2148b03366ace398UL, 0x18f8b8264c6761bfUL, + 0xc830c1c495c9fb0fUL, 0x981a76102086a0aaUL, 0xaa16012142f35760UL, 0x35cc54060c763cf6UL, + 0x42907d66cc45db2dUL, 0x8203d44b965af4bcUL, 0x3d6f3cefc3a0e868UL, 0xbc73ff69d292bda7UL, + 0x8722ed0102e20a29UL, 0x8f8185e8cd34deb7UL, 0x9b0561dda7ee01d9UL, 0x5335a0193227fad6UL, + 0xc9cecc74e81a6fd5UL, 0x54f5832e5c2431eaUL, 0x99e47ba05d553470UL, 0xf7bee756acd226ceUL, + 0x384e05a5571816fdUL, 0xd1367452a47d0e6aUL, 0xf29fde1c386ad85bUL, 0x320c77316275f7caUL, + 0xd0c879e2d9ae9ab0UL, 0xdb7406c69110ef5dUL, 0x45505e51a2461011UL, 0xfc029872e46c5323UL, + 0xfa3cb6f5f7bc0cc5UL, 0x031f17cd8768a173UL, 0xbd8df2d9af41297dUL, 0x9d3b4f5ab43e5e3fUL, + 0x4071671b36feee84UL, 0x716207e7d3e3b83dUL, 0x48d20ff2f9283a1aUL, 0x27769eb4757cbc7eUL, + 0x5c56ebc793f2e574UL, 0xa48b474f9ef5dc18UL, 0x52cbada94ff46e0cUL, 0x60c7da982d8199c6UL, + 0x0e9d466edc068b78UL, 0x4eec2175eaf865fcUL, 0x550b8e9e21f7a530UL, 0x6b7ba5bc653fec2bUL, + 0x5eb7f1ba6949d0ddUL, 0x57ea94e3db4c9099UL, 0xf640eae6d101b214UL, 0xdd4a284182c0b0bbUL, + 0xff1d8fbf6304f250UL, 0xb8accb933bf9d7e8UL, 0xe8867c478eb68c4dUL, 0x3f8e2692391bddc1UL, + 0xcb2fd60912a15a7cUL, 0xaec935dbab983d2fUL, 0xf55ffd2b56691367UL, 0x80e2ce366ce1c115UL, + 0x179bf3f8edb27e1dUL, 0x01fe0db07dd394daUL, 0xda8a0b76ecc37b87UL, 0x44ae53e1df9584cbUL, + 0xb310b4b77347a205UL, 0xdfab323c787b8512UL, 0x3b511268d070b78eUL, 0x65e6e3d2b9396753UL, + 0x6864b271e2574d58UL, 0x259784c98fc789d7UL, 0x02e11a7dfabb35a9UL, 0x8841a6dfa337158bUL, + 0x7ade78c39b5dcdd0UL, 0xb7cf804d9a2cc84aUL, 0x20b6bd831b7f7742UL, 0x75bd331d3a88d272UL, + 0x418f6aab4b2d7a5eUL, 0xd9951cbb6babdaf4UL, 0xb6318dfde7ff5c90UL, 0x1f389b112264aa83UL, + 0x492c024284fbaec0UL, 0xe33a0363c608f9a0UL, 0x2688930408af28a4UL, 0xc7538a1a341ce4adUL, + 0x5da8e677ee2171aeUL, 0x8c9e92254a5c7fc4UL, 0x63d8cd55aae938b5UL, 0x29ebd8daa97a3706UL, + 0x959827b37be88aa1UL, 0x1484e4356adadf6eUL, 0xa7945082199d7d6bUL, 0xbf6ce8a455fa1cd4UL, + 0x9cc542eac9edcae5UL, 0x79c16f0e1c356ca3UL, 0x89bfab6fdee48151UL, 0xd4174d1830c5f0ffUL, + 0x9258048415eb419dUL, 0x6139d72850520d1cUL, 0x6a85a80c18ec78f1UL, 0xcd11f88e0171059aUL, + 0xcceff53e7ca29140UL, 0xd229639f2315af19UL, 0x90b91ef9ef507434UL, 0x5977d28d074a1be1UL, + 0x311360fce51d56b9UL, 0xc093a92d5a1f2f91UL, 0x1a19a25bb6dc5416UL, 0xeb996b8a09de2d3eUL, + 0xfee3820f1ed7668aUL, 0xd7085ad5b7ad518cUL, 0x7fff41890fe53345UL, 0xec5948bd67dde602UL, + 0x2fd5f65dbaaa68e0UL, 0xa5754affe32648c2UL, 0xf8ddac880d07396cUL, 0x6fa491468c548664UL, + 0x0c7c5c1326bdbed1UL, 0x4a33158f03930fb3UL, 0x699abfc19f84d982UL, 0xe4fa2054a80b329cUL, + 0x6707f9af438252faUL, 0x08a368e9cfd6d49eUL, 0x47b1442c58fd25b8UL, 0xbbb3dc5ebc91769bUL, + 0x1665fe489061eac7UL, 0x33f27a811fa66310UL, 0x93a609346838d547UL, 0x30ed6d4c98cec263UL, + 0x1dd9816cd8df9f2aUL, 0x94662a03063b1e7bUL, 0x83fdd9fbeb896066UL, 0x7b207573e68e590aUL, + 0x5f49fc0a149a4407UL, 0x343259b671a5a82cUL, 0xfbc2bb458a6f981fUL, 0xc272b350a0a41a38UL, + 0x3aaf1fd8ada32354UL, 0x6cbb868b0b3c2717UL, 0xa2b569c88d2583feUL, 0xf180c9d1bf027928UL, + 0xaf37386bd64ba9f5UL, 0x12bacab2790a8088UL, 0x4c0d3b0810435055UL, 0xb2eeb9070e9436dfUL, + 0xc5b29067cea7d104UL, 0xdcb425f1ff132461UL, 0x4f122cc5972bf126UL, 0xac282fa651230886UL, + 0xe7e537992f6393efUL, 0xe61b3a2952b00735UL, 0x709c0a57ae302ce7UL, 0xe02514ae416058d3UL, + 0xc44c9dd7b37445deUL, 0x5a68c5408022ba92UL, 0x1c278cdca50c0bf0UL, 0x6e5a9cf6f18712beUL, + 0x86dce0b17f319ef3UL, 0x2d34ec2040115d49UL, 0x4bcd183f7e409b69UL, 0x2815d56ad4a9a3dcUL, + 0x24698979f2141d0dUL, 0x0000000000000000UL, 0x1ec696a15fb73e59UL, 0xd86b110b16784e2eUL, + 0x8e7f8858b0e74a6dUL, 0x063e2e8713d05fe6UL, 0xe2c40ed3bbdb6d7aUL, 0xb1f1aeca89fc97acUL, + 0xe1db191e3cb3cc09UL, 0x6418ee62c4eaf389UL, 0xc6ad87aa49cf7077UL, 0xd6f65765ca7ec556UL, + 0x9afb6c6dda3d9503UL, 0x7ce05644888d9236UL, 0x8d609f95378feb1eUL, 0x23a9aa4e9c17d631UL, + 0x6226c0e5d73aac6fUL, 0x56149953a69f0443UL, 0xeeb852c09d66d3abUL, 0x2b0ac2a753c102afUL, + 0x07c023376e03cb3cUL, 0x2ccae1903dc2c993UL, 0xd3d76e2f5ec63bc3UL, 0x9e2458973356ff4cUL, + 0xa66a5d32644ee9b1UL, 0x0a427294356de137UL, 0x783f62be61e6f879UL, 0x1344c70204d91452UL, + 0x5b96c8f0fdf12e48UL, 0xa90916ecc59bf613UL, 0xbe92e5142829880eUL, 0x727d102a548b194eUL, + 0x1be7afebcb0fc0ccUL, 0x3e702b2244c8491bUL, 0xd5e940a84d166425UL, 0x66f9f41f3e51c620UL, + 0xabe80c913f20c3baUL, 0xf07ec461c2d1edf2UL, 0xf361d3ac45b94c81UL, 0x0521394a94b8fe95UL, + 0xadd622162cf09c5cUL, 0xe97871f7f3651897UL, 0xf4a1f09b2bba87bdUL, 0x095d6559b2054044UL, + 0x0bbc7f2448be75edUL, 0x2af4cf172e129675UL, 0x157ae98517094bb4UL, 0x9fda55274e856b96UL, + 0x914713499283e0eeUL, 0xb952c623462a4332UL, 0x74433ead475b46a8UL, 0x8b5eb112245fb4f8UL, + 0xa34b6478f0f61724UL, 0x11a5dd7ffe6221fbUL, 0xc16da49d27ccbb4bUL, 0x76a224d0bde07301UL, + 0x8aa0bca2598c2022UL, 0x4df336b86d90c48fUL, 0xea67663a740db9e4UL, 0xef465f70e0b54771UL, + 0x39b008152acb8227UL, 0x7d1e5bf4f55e06ecUL, 0x105bd0cf83b1b521UL, 0x775c2960c033e7dbUL, + 0x7e014c397236a79fUL, 0x811cc386113255cfUL, 0xeda7450d1a0e72d8UL, 0x5889df3d7a998f3bUL, + 0x2e2bfbedc779fc3aUL, 0xce0eef438619a4e9UL, 0x372d4e7bf6cd095fUL, 0x04df34fae96b6a4fUL, + 0xf923a13870d4adb6UL, 0xa1aa7e050a4d228dUL, 0xa8f71b5cb84862c9UL, 0xb52e9a306097fde3UL, + 0x0d8251a35b6e2a0bUL, 0x2257a7fee1c442ebUL, 0x73831d9a29588d94UL, 0x51d4ba64c89ccf7fUL, + 0x502ab7d4b54f5ba5UL, 0x97793dce8153bf08UL, 0xe5042de4d5d8a646UL, 0x9687307efc802bd2UL, + 0xa05473b5779eb657UL, 0xb4d097801d446939UL, 0xcff0e2f3fbca3033UL, 0xc38cbee0dd778ee2UL, + 0x464f499c252eb162UL, 0xcad1dbb96f72cea6UL, 0xba4dd1eec142e241UL, 0xb00fa37af42f0376UL + }, + { + 0xcce4cd3aa968b245UL, 0x089d5484e80b7fafUL, 0x638246c1b3548304UL, 0xd2fe0ec8c2355492UL, + 0xa7fbdf7ff2374eeeUL, 0x4df1600c92337a16UL, 0x84e503ea523b12fbUL, 0x0790bbfd53ab0c4aUL, + 0x198a780f38f6ea9dUL, 0x2ab30c8f55ec48cbUL, 0xe0f7fed6b2c49db5UL, 0xb6ecf3f422cadbdcUL, + 0x409c9a541358df11UL, 0xd3ce8a56dfde3fe3UL, 0xc3e9224312c8c1a0UL, 0x0d6dfa58816ba507UL, + 0xddf3e1b179952777UL, 0x04c02a42748bb1d9UL, 0x94c2abff9f2decb8UL, 0x4f91752da8f8acf4UL, + 0x78682befb169bf7bUL, 0xe1c77a48af2ff6c4UL, 0x0c5d7ec69c80ce76UL, 0x4cc1e4928fd81167UL, + 0xfeed3d24d9997b62UL, 0x518bb6dfc3a54a23UL, 0x6dbf2d26151f9b90UL, 0xb5bc624b05ea664fUL, + 0xe86aaa525acfe21aUL, 0x4801ced0fb53a0beUL, 0xc91463e6c00868edUL, 0x1027a815cd16fe43UL, + 0xf67069a0319204cdUL, 0xb04ccc976c8abce7UL, 0xc0b9b3fc35e87c33UL, 0xf380c77c58f2de65UL, + 0x50bb3241de4e2152UL, 0xdf93f490435ef195UL, 0xf1e0d25d62390887UL, 0xaf668bfb1a3c3141UL, + 0xbc11b251f00a7291UL, 0x73a5eed47e427d47UL, 0x25bee3f6ee4c3b2eUL, 0x43cc0beb34786282UL, + 0xc824e778dde3039cUL, 0xf97d86d98a327728UL, 0xf2b043e24519b514UL, 0xe297ebf7880f4b57UL, + 0x3a94a49a98fab688UL, 0x868516cb68f0c419UL, 0xeffa11af0964ee50UL, 0xa4ab4ec0d517f37dUL, + 0xa9c6b498547c567aUL, 0x8e18424f80fbbbb6UL, 0x0bcdc53bcf2bc23cUL, 0x137739aaea3643d0UL, + 0x2c1333ec1bac2ff0UL, 0x8d48d3f0a7db0625UL, 0x1e1ac3f26b5de6d7UL, 0xf520f81f16b2b95eUL, + 0x9f0f6ec450062e84UL, 0x0130849e1deb6b71UL, 0xd45e31ab8c7533a9UL, 0x652279a2fd14e43fUL, + 0x3209f01e70f1c927UL, 0xbe71a770cac1a473UL, 0x0e3d6be7a64b1894UL, 0x7ec8148cff29d840UL, + 0xcb7476c7fac3be0fUL, 0x72956a4a63a91636UL, 0x37f95ec21991138fUL, 0x9e3fea5a4ded45f5UL, + 0x7b38ba50964902e8UL, 0x222e580bbde73764UL, 0x61e253e0899f55e6UL, 0xfc8d2805e352ad80UL, + 0x35994be3235ac56dUL, 0x09add01af5e014deUL, 0x5e8659a6780539c6UL, 0xb17c48097161d796UL, + 0x026015213acbd6e2UL, 0xd1ae9f77e515e901UL, 0xb7dc776a3f21b0adUL, 0xaba6a1b96eb78098UL, + 0x9bcf4486248d9f5dUL, 0x582666c536455efdUL, 0xfdbdac9bfeb9c6f1UL, 0xc47999be4163cdeaUL, + 0x765540081722a7efUL, 0x3e548ed8ec710751UL, 0x3d041f67cb51bac2UL, 0x7958af71ac82d40aUL, + 0x36c9da5c047a78feUL, 0xed9a048e33af38b2UL, 0x26ee7249c96c86bdUL, 0x900281bdeba65d61UL, + 0x11172c8bd0fd9532UL, 0xea0abf73600434f8UL, 0x42fc8f75299309f3UL, 0x34a9cf7d3eb1ae1cUL, + 0x2b838811480723baUL, 0x5ce64c8742ceef24UL, 0x1adae9b01fd6570eUL, 0x3c349bf9d6bad1b3UL, + 0x82453c891c7b75c0UL, 0x97923a40b80d512bUL, 0x4a61dbf1c198765cUL, 0xb48ce6d518010d3eUL, + 0xcfb45c858e480fd6UL, 0xd933cbf30d1e96aeUL, 0xd70ea014ab558e3aUL, 0xc189376228031742UL, + 0x9262949cd16d8b83UL, 0xeb3a3bed7def5f89UL, 0x49314a4ee6b8cbcfUL, 0xdcc3652f647e4c06UL, + 0xda635a4c2a3e2b3dUL, 0x470c21a940f3d35bUL, 0x315961a157d174b4UL, 0x6672e81dda3459acUL, + 0x5b76f77a1165e36eUL, 0x445cb01667d36ec8UL, 0xc5491d205c88a69bUL, 0x456c34887a3805b9UL, + 0xffddb9bac4721013UL, 0x99af51a71e4649bfUL, 0xa15be01cbc7729d5UL, 0x52db2760e485f7b0UL, + 0x8c78576eba306d54UL, 0xae560f6507d75a30UL, 0x95f22f6182c687c9UL, 0x71c5fbf54489aba5UL, + 0xca44f259e728d57eUL, 0x88b87d2ccebbdc8dUL, 0xbab18d32be4a15aaUL, 0x8be8ec93e99b611eUL, + 0x17b713e89ebdf209UL, 0xb31c5d284baa0174UL, 0xeeca9531148f8521UL, 0xb8d198138481c348UL, + 0x8988f9b2d350b7fcUL, 0xb9e11c8d996aa839UL, 0x5a4673e40c8e881fUL, 0x1687977683569978UL, + 0xbf4123eed72acf02UL, 0x4ea1f1b3b513c785UL, 0xe767452be16f91ffUL, 0x7505d1b730021a7cUL, + 0xa59bca5ec8fc980cUL, 0xad069eda20f7e7a3UL, 0x38f4b1bba231606aUL, 0x60d2d77e94743e97UL, + 0x9affc0183966f42cUL, 0x248e6768f3a7505fUL, 0xcdd449a4b483d934UL, 0x87b59255751baf68UL, + 0x1bea6d2e023d3c7fUL, 0x6b1f12455b5ffcabUL, 0x743555292de9710dUL, 0xd8034f6d10f5fddfUL, + 0xc6198c9f7ba81b08UL, 0xbb8109aca3a17edbUL, 0xfa2d1766ad12cabbUL, 0xc729080166437079UL, + 0x9c5fff7b77269317UL, 0x0000000000000000UL, 0x15d706c9a47624ebUL, 0x6fdf38072fd44d72UL, + 0x5fb6dd3865ee52b7UL, 0xa33bf53d86bcff37UL, 0xe657c1b5fc84fa8eUL, 0xaa962527735cebe9UL, + 0x39c43525bfda0b1bUL, 0x204e4d2a872ce186UL, 0x7a083ece8ba26999UL, 0x554b9c9db72efbfaUL, + 0xb22cd9b656416a05UL, 0x96a2bedea5e63a5aUL, 0x802529a826b0a322UL, 0x8115ad363b5bc853UL, + 0x8375b81701901eb1UL, 0x3069e53f4a3a1fc5UL, 0xbd2136cfede119e0UL, 0x18bafc91251d81ecUL, + 0x1d4a524d4c7d5b44UL, 0x05f0aedc6960daa8UL, 0x29e39d3072ccf558UL, 0x70f57f6b5962c0d4UL, + 0x989fd53903ad22ceUL, 0xf84d024797d91c59UL, 0x547b1803aac5908bUL, 0xf0d056c37fd263f6UL, + 0xd56eb535919e58d8UL, 0x1c7ad6d351963035UL, 0x2e7326cd2167f912UL, 0xac361a443d1c8cd2UL, + 0x697f076461942a49UL, 0x4b515f6fdc731d2dUL, 0x8ad8680df4700a6fUL, 0x41ac1eca0eb3b460UL, + 0x7d988533d80965d3UL, 0xa8f6300649973d0bUL, 0x7765c4960ac9cc9eUL, 0x7ca801adc5e20ea2UL, + 0xdea3700e5eb59ae4UL, 0xa06b6482a19c42a4UL, 0x6a2f96db46b497daUL, 0x27def6d7d487edccUL, + 0x463ca5375d18b82aUL, 0xa6cb5be1efdc259fUL, 0x53eba3fef96e9cc1UL, 0xce84d81b93a364a7UL, + 0xf4107c810b59d22fUL, 0x333974806d1aa256UL, 0x0f0def79bba073e5UL, 0x231edc95a00c5c15UL, + 0xe437d494c64f2c6cUL, 0x91320523f64d3610UL, 0x67426c83c7df32ddUL, 0x6eefbc99323f2603UL, + 0x9d6f7be56acdf866UL, 0x5916e25b2bae358cUL, 0x7ff89012e2c2b331UL, 0x035091bf2720bd93UL, + 0x561b0d22900e4669UL, 0x28d319ae6f279e29UL, 0x2f43a2533c8c9263UL, 0xd09e1be9f8fe8270UL, + 0xf740ed3e2c796fbcUL, 0xdb53ded237d5404cUL, 0x62b2c25faebfe875UL, 0x0afd41a5d2c0a94dUL, + 0x6412fd3ce0ff8f4eUL, 0xe3a76f6995e42026UL, 0x6c8fa9b808f4f0e1UL, 0xc2d9a6dd0f23aad1UL, + 0x8f28c6d19d10d0c7UL, 0x85d587744fd0798aUL, 0xa20b71a39b579446UL, 0x684f83fa7c7f4138UL, + 0xe507500adba4471dUL, 0x3f640a46f19a6c20UL, 0x1247bd34f7dd28a1UL, 0x2d23b77206474481UL, + 0x93521002cc86e0f2UL, 0x572b89bc8de52d18UL, 0xfb1d93f8b0f9a1caUL, 0xe95a2ecc4724896bUL, + 0x3ba420048511ddf9UL, 0xd63e248ab6bee54bUL, 0x5dd6c8195f258455UL, 0x06a03f634e40673bUL, + 0x1f2a476c76b68da6UL, 0x217ec9b49ac78af7UL, 0xecaa80102e4453c3UL, 0x14e78257b99d4f9aUL + }, + { + 0x20329b2cc87bba05UL, 0x4f5eb6f86546a531UL, 0xd4f44775f751b6b1UL, 0x8266a47b850dfa8bUL, + 0xbb986aa15a6ca985UL, 0xc979eb08f9ae0f99UL, 0x2da6f447a2375ea1UL, 0x1e74275dcd7d8576UL, + 0xbc20180a800bc5f8UL, 0xb4a2f701b2dc65beUL, 0xe726946f981b6d66UL, 0x48e6c453bf21c94cUL, + 0x42cad9930f0a4195UL, 0xefa47b64aacccd20UL, 0x71180a8960409a42UL, 0x8bb3329bf6a44e0cUL, + 0xd34c35de2d36daccUL, 0xa92f5b7cbc23dc96UL, 0xb31a85aa68bb09c3UL, 0x13e04836a73161d2UL, + 0xb24dfc4129c51d02UL, 0x8ae44b70b7da5acdUL, 0xe671ed84d96579a7UL, 0xa4bb3417d66f3832UL, + 0x4572ab38d56d2de8UL, 0xb1b47761ea47215cUL, 0xe81c09cf70aba15dUL, 0xffbdb872ce7f90acUL, + 0xa8782297fd5dc857UL, 0x0d946f6b6a4ce4a4UL, 0xe4df1f4f5b995138UL, 0x9ebc71edca8c5762UL, + 0x0a2c1dc0b02b88d9UL, 0x3b503c115d9d7b91UL, 0xc64376a8111ec3a2UL, 0xcec199a323c963e4UL, + 0xdc76a87ec58616f7UL, 0x09d596e073a9b487UL, 0x14583a9d7d560dafUL, 0xf4c6dc593f2a0cb4UL, + 0xdd21d19584f80236UL, 0x4a4836983ddde1d3UL, 0xe58866a41ae745f9UL, 0xf591a5b27e541875UL, + 0x891dc05074586693UL, 0x5b068c651810a89eUL, 0xa30346bc0c08544fUL, 0x3dbf3751c684032dUL, + 0x2a1e86ec785032dcUL, 0xf73f5779fca830eaUL, 0xb60c05ca30204d21UL, 0x0cc316802b32f065UL, + 0x8770241bdd96be69UL, 0xb861e18199ee95dbUL, 0xf805cad91418fcd1UL, 0x29e70dccbbd20e82UL, + 0xc7140f435060d763UL, 0x0f3a9da0e8b0cc3bUL, 0xa2543f574d76408eUL, 0xbd7761e1c175d139UL, + 0x4b1f4f737ca3f512UL, 0x6dc2df1f2fc137abUL, 0xf1d05c3967b14856UL, 0xa742bf3715ed046cUL, + 0x654030141d1697edUL, 0x07b872abda676c7dUL, 0x3ce84eba87fa17ecUL, 0xc1fb0403cb79afdfUL, + 0x3e46bc7105063f73UL, 0x278ae987121cd678UL, 0xa1adb4778ef47cd0UL, 0x26dd906c5362c2b9UL, + 0x05168060589b44e2UL, 0xfbfc41f9d79ac08fUL, 0x0e6de44ba9ced8faUL, 0x9feb08068bf243a3UL, + 0x7b341749d06b129bUL, 0x229c69e74a87929aUL, 0xe09ee6c4427c011bUL, 0x5692e30e725c4c3aUL, + 0xda99a33e5e9f6e4bUL, 0x353dd85af453a36bUL, 0x25241b4c90e0fee7UL, 0x5de987258309d022UL, + 0xe230140fc0802984UL, 0x93281e86a0c0b3c6UL, 0xf229d719a4337408UL, 0x6f6c2dd4ad3d1f34UL, + 0x8ea5b2fbae3f0aeeUL, 0x8331dd90c473ee4aUL, 0x346aa1b1b52db7aaUL, 0xdf8f235e06042aa9UL, + 0xcc6f6b68a1354b7bUL, 0x6c95a6f46ebf236aUL, 0x52d31a856bb91c19UL, 0x1a35ded6d498d555UL, + 0xf37eaef2e54d60c9UL, 0x72e181a9a3c2a61cUL, 0x98537aad51952fdeUL, 0x16f6c856ffaa2530UL, + 0xd960281e9d1d5215UL, 0x3a0745fa1ce36f50UL, 0x0b7b642bf1559c18UL, 0x59a87eae9aec8001UL, + 0x5e100c05408bec7cUL, 0x0441f98b19e55023UL, 0xd70dcc5534d38aefUL, 0x927f676de1bea707UL, + 0x9769e70db925e3e5UL, 0x7a636ea29115065aUL, 0x468b201816ef11b6UL, 0xab81a9b73edff409UL, + 0xc0ac7de88a07bb1eUL, 0x1f235eb68c0391b7UL, 0x6056b074458dd30fUL, 0xbe8eeac102f7ed67UL, + 0xcd381283e04b5fbaUL, 0x5cbefecec277c4e3UL, 0xd21b4c356c48ce0dUL, 0x1019c31664b35d8cUL, + 0x247362a7d19eea26UL, 0xebe582efb3299d03UL, 0x02aef2cb82fc289fUL, 0x86275df09ce8aaa8UL, + 0x28b07427faac1a43UL, 0x38a9b7319e1f47cfUL, 0xc82e92e3b8d01b58UL, 0x06ef0b409b1978bcUL, + 0x62f842bfc771fb90UL, 0x9904034610eb3b1fUL, 0xded85ab5477a3e68UL, 0x90d195a663428f98UL, + 0x5384636e2ac708d8UL, 0xcbd719c37b522706UL, 0xae9729d76644b0ebUL, 0x7c8c65e20a0c7ee6UL, + 0x80c856b007f1d214UL, 0x8c0b40302cc32271UL, 0xdbcedad51fe17a8aUL, 0x740e8ae938dbdea0UL, + 0xa615c6dc549310adUL, 0x19cc55f6171ae90bUL, 0x49b1bdb8fe5fdd8dUL, 0xed0a89af2830e5bfUL, + 0x6a7aadb4f5a65bd6UL, 0x7e22972988f05679UL, 0xf952b3325566e810UL, 0x39fecedadf61530eUL, + 0x6101c99f04f3c7ceUL, 0x2e5f7f6761b562ffUL, 0xf08725d226cf5c97UL, 0x63af3b54860fef51UL, + 0x8ff2cb10ef411e2fUL, 0x884ab9bb35267252UL, 0x4df04433e7ba8daeUL, 0x9afd8866d3690741UL, + 0x66b9bb34de94abb3UL, 0x9baaf18d92171380UL, 0x543c11c5f0a064a5UL, 0x17a1b1bdbed431f1UL, + 0xb5f58eeaf3a2717fUL, 0xc355f6c849858740UL, 0xec5df044694ef17eUL, 0xd83751f5dc6346d4UL, + 0xfc4433520dfdacf2UL, 0x0000000000000000UL, 0x5a51f58e596ebc5fUL, 0x3285aaf12e34cf16UL, + 0x8d5c39db6dbd36b0UL, 0x12b731dde64f7513UL, 0x94906c2d7aa7dfbbUL, 0x302b583aacc8e789UL, + 0x9d45facd090e6b3cUL, 0x2165e2c78905aec4UL, 0x68d45f7f775a7349UL, 0x189b2c1d5664fdcaUL, + 0xe1c99f2f030215daUL, 0x6983269436246788UL, 0x8489af3b1e148237UL, 0xe94b702431d5b59cUL, + 0x33d2d31a6f4adbd7UL, 0xbfd9932a4389f9a6UL, 0xb0e30e8aab39359dUL, 0xd1e2c715afcaf253UL, + 0x150f43763c28196eUL, 0xc4ed846393e2eb3dUL, 0x03f98b20c3823c5eUL, 0xfd134ab94c83b833UL, + 0x556b682eb1de7064UL, 0x36c4537a37d19f35UL, 0x7559f30279a5ca61UL, 0x799ae58252973a04UL, + 0x9c12832648707ffdUL, 0x78cd9c6913e92ec5UL, 0x1d8dac7d0effb928UL, 0x439da0784e745554UL, + 0x413352b3cc887dcbUL, 0xbacf134a1b12bd44UL, 0x114ebafd25cd494dUL, 0x2f08068c20cb763eUL, + 0x76a07822ba27f63fUL, 0xeab2fb04f25789c2UL, 0xe3676de481fe3d45UL, 0x1b62a73d95e6c194UL, + 0x641749ff5c68832cUL, 0xa5ec4dfc97112cf3UL, 0xf6682e92bdd6242bUL, 0x3f11c59a44782bb2UL, + 0x317c21d1edb6f348UL, 0xd65ab5be75ad9e2eUL, 0x6b2dd45fb4d84f17UL, 0xfaab381296e4d44eUL, + 0xd0b5befeeeb4e692UL, 0x0882ef0b32d7a046UL, 0x512a91a5a83b2047UL, 0x963e9ee6f85bf724UL, + 0x4e09cf132438b1f0UL, 0x77f701c9fb59e2feUL, 0x7ddb1c094b726a27UL, 0x5f4775ee01f5f8bdUL, + 0x9186ec4d223c9b59UL, 0xfeeac1998f01846dUL, 0xac39db1ce4b89874UL, 0xb75b7c21715e59e0UL, + 0xafc0503c273aa42aUL, 0x6e3b543fec430bf5UL, 0x704f7362213e8e83UL, 0x58ff0745db9294c0UL, + 0x67eec2df9feabf72UL, 0xa0facd9ccf8a6811UL, 0xb936986ad890811aUL, 0x95c715c63bd9cb7aUL, + 0xca8060283a2c33c7UL, 0x507de84ee9453486UL, 0x85ded6d05f6a96f6UL, 0x1cdad5964f81ade9UL, + 0xd5a33e9eb62fa270UL, 0x40642b588df6690aUL, 0x7f75eec2c98e42b8UL, 0x2cf18dace3494a60UL, + 0x23cb100c0bf9865bUL, 0xeef3028febb2d9e1UL, 0x4425d2d394133929UL, 0xaad6d05c7fa1e0c8UL, + 0xad6ea2f7a5c68cb5UL, 0xc2028f2308fb9381UL, 0x819f2f5b468fc6d5UL, 0xc5bafd88d29cfffcUL, + 0x47dc59f357910577UL, 0x2b49ff07392e261dUL, 0x57c59ae5332258fbUL, 0x73b6f842e2bcb2ddUL, + 0xcf96e04862b77725UL, 0x4ca73dd8a6c4996fUL, 0x015779eb417e14c1UL, 0x37932a9176af8bf4UL + }, + { + 0x190a2c9b249df23eUL, 0x2f62f8b62263e1e9UL, 0x7a7f754740993655UL, 0x330b7ba4d5564d9fUL, + 0x4c17a16a46672582UL, 0xb22f08eb7d05f5b8UL, 0x535f47f40bc148ccUL, 0x3aec5d27d4883037UL, + 0x10ed0a1825438f96UL, 0x516101f72c233d17UL, 0x13cc6f949fd04eaeUL, 0x739853c441474bfdUL, + 0x653793d90d3f5b1bUL, 0x5240647b96b0fc2fUL, 0x0c84890ad27623e0UL, 0xd7189b32703aaea3UL, + 0x2685de3523bd9c41UL, 0x99317c5b11bffefaUL, 0x0d9baa854f079703UL, 0x70b93648fbd48ac5UL, + 0xa80441fce30bc6beUL, 0x7287704bdc36ff1eUL, 0xb65384ed33dc1f13UL, 0xd36417343ee34408UL, + 0x39cd38ab6e1bf10fUL, 0x5ab861770a1f3564UL, 0x0ebacf09f594563bUL, 0xd04572b884708530UL, + 0x3cae9722bdb3af47UL, 0x4a556b6f2f5cbaf2UL, 0xe1704f1f76c4bd74UL, 0x5ec4ed7144c6dfcfUL, + 0x16afc01d4c7810e6UL, 0x283f113cd629ca7aUL, 0xaf59a8761741ed2dUL, 0xeed5a3991e215facUL, + 0x3bf37ea849f984d4UL, 0xe413e096a56ce33cUL, 0x2c439d3a98f020d1UL, 0x637559dc6404c46bUL, + 0x9e6c95d1e5f5d569UL, 0x24bb9836045fe99aUL, 0x44efa466dac8ecc9UL, 0xc6eab2a5c80895d6UL, + 0x803b50c035220cc4UL, 0x0321658cba93c138UL, 0x8f9ebc465dc7ee1cUL, 0xd15a5137190131d3UL, + 0x0fa5ec8668e5e2d8UL, 0x91c979578d1037b1UL, 0x0642ca05693b9f70UL, 0xefca80168350eb4fUL, + 0x38d21b24f36a45ecUL, 0xbeab81e1af73d658UL, 0x8cbfd9cae7542f24UL, 0xfd19cc0d81f11102UL, + 0x0ac6430fbb4dbc90UL, 0x1d76a09d6a441895UL, 0x2a01573ff1cbbfa1UL, 0xb572e161894fde2bUL, + 0x8124734fa853b827UL, 0x614b1fdf43e6b1b0UL, 0x68ac395c4238cc18UL, 0x21d837bfd7f7b7d2UL, + 0x20c714304a860331UL, 0x5cfaab726324aa14UL, 0x74c5ba4eb50d606eUL, 0xf3a3030474654739UL, + 0x23e671bcf015c209UL, 0x45f087e947b9582aUL, 0xd8bd77b418df4c7bUL, 0xe06f6c90ebb50997UL, + 0x0bd96080263c0873UL, 0x7e03f9410e40dcfeUL, 0xb8e94be4c6484928UL, 0xfb5b0608e8ca8e72UL, + 0x1a2b49179e0e3306UL, 0x4e29e76961855059UL, 0x4f36c4e6fcf4e4baUL, 0x49740ee395cf7bcaUL, + 0xc2963ea386d17f7dUL, 0x90d65ad810618352UL, 0x12d34c1b02a1fa4dUL, 0xfa44258775bb3a91UL, + 0x18150f14b9ec46ddUL, 0x1491861e6b9a653dUL, 0x9a1019d7ab2c3fc2UL, 0x3668d42d06fe13d7UL, + 0xdcc1fbb25606a6d0UL, 0x969490dd795a1c22UL, 0x3549b1a1bc6dd2efUL, 0xc94f5e23a0ed770eUL, + 0xb9f6686b5b39fdcbUL, 0xc4d4f4a6efeae00dUL, 0xe732851a1fff2204UL, 0x94aad6de5eb869f9UL, + 0x3f8ff2ae07206e7fUL, 0xfe38a9813b62d03aUL, 0xa7a1ad7a8bee2466UL, 0x7b6056c8dde882b6UL, + 0x302a1e286fc58ca7UL, 0x8da0fa457a259bc7UL, 0xb3302b64e074415bUL, 0x5402ae7eff8b635fUL, + 0x08f8050c9cafc94bUL, 0xae468bf98a3059ceUL, 0x88c355cca98dc58fUL, 0xb10e6d67c7963480UL, + 0xbad70de7e1aa3cf3UL, 0xbfb4a26e320262bbUL, 0xcb711820870f02d5UL, 0xce12b7a954a75c9dUL, + 0x563ce87dd8691684UL, 0x9f73b65e7884618aUL, 0x2b1e74b06cba0b42UL, 0x47cec1ea605b2df1UL, + 0x1c698312f735ac76UL, 0x5fdbcefed9b76b2cUL, 0x831a354c8fb1cdfcUL, 0x820516c312c0791fUL, + 0xb74ca762aeadabf0UL, 0xfc06ef821c80a5e1UL, 0x5723cbf24518a267UL, 0x9d4df05d5f661451UL, + 0x588627742dfd40bfUL, 0xda8331b73f3d39a0UL, 0x17b0e392d109a405UL, 0xf965400bcf28fba9UL, + 0x7c3dbf4229a2a925UL, 0x023e460327e275dbUL, 0x6cd0b55a0ce126b3UL, 0xe62da695828e96e7UL, + 0x42ad6e63b3f373b9UL, 0xe50cc319381d57dfUL, 0xc5cbd729729b54eeUL, 0x46d1e265fd2a9912UL, + 0x6428b056904eeff8UL, 0x8be23040131e04b7UL, 0x6709d5da2add2ec0UL, 0x075de98af44a2b93UL, + 0x8447dcc67bfbe66fUL, 0x6616f655b7ac9a23UL, 0xd607b8bded4b1a40UL, 0x0563af89d3a85e48UL, + 0x3db1b4ad20c21ba4UL, 0x11f22997b8323b75UL, 0x292032b34b587e99UL, 0x7f1cdace9331681dUL, + 0x8e819fc9c0b65affUL, 0xa1e3677fe2d5bb16UL, 0xcd33d225ee349da5UL, 0xd9a2543b85aef898UL, + 0x795e10cbfa0af76dUL, 0x25a4bbb9992e5d79UL, 0x78413344677b438eUL, 0xf0826688cef68601UL, + 0xd27b34bba392f0ebUL, 0x551d8df162fad7bcUL, 0x1e57c511d0d7d9adUL, 0xdeffbdb171e4d30bUL, + 0xf4feea8e802f6caaUL, 0xa480c8f6317de55eUL, 0xa0fc44f07fa40ff5UL, 0x95b5f551c3c9dd1aUL, + 0x22f952336d6476eaUL, 0x0000000000000000UL, 0xa6be8ef5169f9085UL, 0xcc2cf1aa73452946UL, + 0x2e7ddb39bf12550aUL, 0xd526dd3157d8db78UL, 0x486b2d6c08becf29UL, 0x9b0f3a58365d8b21UL, + 0xac78cdfaadd22c15UL, 0xbc95c7e28891a383UL, 0x6a927f5f65dab9c3UL, 0xc3891d2c1ba0cb9eUL, + 0xeaa92f9f50f8b507UL, 0xcf0d9426c9d6e87eUL, 0xca6e3baf1a7eb636UL, 0xab25247059980786UL, + 0x69b31ad3df4978fbUL, 0xe2512a93cc577c4cUL, 0xff278a0ea61364d9UL, 0x71a615c766a53e26UL, + 0x89dc764334fc716cUL, 0xf87a638452594f4aUL, 0xf2bc208be914f3daUL, 0x8766b94ac1682757UL, + 0xbbc82e687cdb8810UL, 0x626a7a53f9757088UL, 0xa2c202f358467a2eUL, 0x4d0882e5db169161UL, + 0x09e7268301de7da8UL, 0xe897699c771ac0dcUL, 0xc8507dac3d9cc3edUL, 0xc0a878a0a1330aa6UL, + 0x978bb352e42ba8c1UL, 0xe9884a13ea6b743fUL, 0x279afdbabecc28a2UL, 0x047c8c064ed9eaabUL, + 0x507e2278b15289f4UL, 0x599904fbb08cf45cUL, 0xbd8ae46d15e01760UL, 0x31353da7f2b43844UL, + 0x8558ff49e68a528cUL, 0x76fbfc4d92ef15b5UL, 0x3456922e211c660cUL, 0x86799ac55c1993b4UL, + 0x3e90d1219a51da9cUL, 0x2d5cbeb505819432UL, 0x982e5fd48cce4a19UL, 0xdb9c1238a24c8d43UL, + 0xd439febecaa96f9bUL, 0x418c0bef0960b281UL, 0x158ea591f6ebd1deUL, 0x1f48e69e4da66d4eUL, + 0x8afd13cf8e6fb054UL, 0xf5e1c9011d5ed849UL, 0xe34e091c5126c8afUL, 0xad67ee7530a398f6UL, + 0x43b24dec2e82c75aUL, 0x75da99c1287cd48dUL, 0x92e81cdb3783f689UL, 0xa3dd217cc537cecdUL, + 0x60543c50de970553UL, 0x93f73f54aaf2426aUL, 0xa91b62737e7a725dUL, 0xf19d4507538732e2UL, + 0x77e4dfc20f9ea156UL, 0x7d229ccdb4d31dc6UL, 0x1b346a98037f87e5UL, 0xedf4c615a4b29e94UL, + 0x4093286094110662UL, 0xb0114ee85ae78063UL, 0x6ff1d0d6b672e78bUL, 0x6dcf96d591909250UL, + 0xdfe09e3eec9567e8UL, 0x3214582b4827f97cUL, 0xb46dc2ee143e6ac8UL, 0xf6c0ac8da7cd1971UL, + 0xebb60c10cd8901e4UL, 0xf7df8f023abcad92UL, 0x9c52d3d2c217a0b2UL, 0x6b8d5cd0f8ab0d20UL, + 0x3777f7a29b8fa734UL, 0x011f238f9d71b4e3UL, 0xc1b75b2f3c42be45UL, 0x5de588fdfe551ef7UL, + 0x6eeef3592b035368UL, 0xaa3a07ffc4e9b365UL, 0xecebe59a39c32a77UL, 0x5ba742f8976e8187UL, + 0x4b4a48e0b22d0e11UL, 0xddded83dcb771233UL, 0xa59feb79ac0c51bdUL, 0xc7f5912a55792135UL + }, + { + 0x6d6ae04668a9b08aUL, 0x3ab3f04b0be8c743UL, 0xe51e166b54b3c908UL, 0xbe90a9eb35c2f139UL, + 0xb2c7066637f2bec1UL, 0xaa6945613392202cUL, 0x9a28c36f3b5201ebUL, 0xddce5a93ab536994UL, + 0x0e34133ef6382827UL, 0x52a02ba1ec55048bUL, 0xa2f88f97c4b2a177UL, 0x8640e513ca2251a5UL, + 0xcdf1d36258137622UL, 0xfe6cb708dedf8ddbUL, 0x8a174a9ec8121e5dUL, 0x679896036b81560eUL, + 0x59ed033395795feeUL, 0x1dd778ab8b74edafUL, 0xee533ef92d9f926dUL, 0x2a8c79baf8a8d8f5UL, + 0x6bcf398e69b119f6UL, 0xe20491742fafdd95UL, 0x276488e0809c2aecUL, 0xea955b82d88f5cceUL, + 0x7102c63a99d9e0c4UL, 0xf9763017a5c39946UL, 0x429fa2501f151b3dUL, 0x4659c72bea05d59eUL, + 0x984b7fdccf5a6634UL, 0xf742232953fbb161UL, 0x3041860e08c021c7UL, 0x747bfd9616cd9386UL, + 0x4bb1367192312787UL, 0x1b72a1638a6c44d3UL, 0x4a0e68a6e8359a66UL, 0x169a5039f258b6caUL, + 0xb98a2ef44edee5a4UL, 0xd9083fe85e43a737UL, 0x967f6ce239624e13UL, 0x8874f62d3c1a7982UL, + 0x3c1629830af06e3fUL, 0x9165ebfd427e5a8eUL, 0xb5dd81794ceeaa5cUL, 0x0de8f15a7834f219UL, + 0x70bd98ede3dd5d25UL, 0xaccc9ca9328a8950UL, 0x56664eda1945ca28UL, 0x221db34c0f8859aeUL, + 0x26dbd637fa98970dUL, 0x1acdffb4f068f932UL, 0x4585254f64090fa0UL, 0x72de245e17d53afaUL, + 0x1546b25d7c546cf4UL, 0x207e0ffffb803e71UL, 0xfaaad2732bcf4378UL, 0xb462dfae36ea17bdUL, + 0xcf926fd1ac1b11fdUL, 0xe0672dc7dba7ba4aUL, 0xd3fa49ad5d6b41b3UL, 0x8ba81449b216a3bcUL, + 0x14f9ec8a0650d115UL, 0x40fc1ee3eb1d7ce2UL, 0x23a2ed9b758ce44fUL, 0x782c521b14fddc7eUL, + 0x1c68267cf170504eUL, 0xbcf31558c1ca96e6UL, 0xa781b43b4ba6d235UL, 0xf6fd7dfe29ff0c80UL, + 0xb0a4bad5c3fad91eUL, 0xd199f51ea963266cUL, 0x414340349119c103UL, 0x5405f269ed4dadf7UL, + 0xabd61bb649969dcdUL, 0x6813dbeae7bdc3c8UL, 0x65fb2ab09f8931d1UL, 0xf1e7fae152e3181dUL, + 0xc1a67cef5a2339daUL, 0x7a4feea8e0f5bba1UL, 0x1e0b9acf05783791UL, 0x5b8ebf8061713831UL, + 0x80e53cdbcb3af8d9UL, 0x7e898bd315e57502UL, 0xc6bcfbf0213f2d47UL, 0x95a38e86b76e942dUL, + 0x092e94218d243cbaUL, 0x8339debf453622e7UL, 0xb11be402b9fe64ffUL, 0x57d9100d634177c9UL, + 0xcc4e8db52217cbc3UL, 0x3b0cae9c71ec7aa2UL, 0xfb158ca451cbfe99UL, 0x2b33276d82ac6514UL, + 0x01bf5ed77a04bde1UL, 0xc5601994af33f779UL, 0x75c4a3416cc92e67UL, 0xf3844652a6eb7fc2UL, + 0x3487e375fdd0ef64UL, 0x18ae430704609eedUL, 0x4d14efb993298efbUL, 0x815a620cb13e4538UL, + 0x125c354207487869UL, 0x9eeea614ce42cf48UL, 0xce2d3106d61fac1cUL, 0xbbe99247bad6827bUL, + 0x071a871f7b1c149dUL, 0x2e4a1cc10db81656UL, 0x77a71ff298c149b8UL, 0x06a5d9c80118a97cUL, + 0xad73c27e488e34b1UL, 0x443a7b981e0db241UL, 0xe3bbcfa355ab6074UL, 0x0af276450328e684UL, + 0x73617a896dd1871bUL, 0x58525de4ef7de20fUL, 0xb7be3dcab8e6cd83UL, 0x19111dd07e64230cUL, + 0x842359a03e2a367aUL, 0x103f89f1f3401fb6UL, 0xdc710444d157d475UL, 0xb835702334da5845UL, + 0x4320fc876511a6dcUL, 0xd026abc9d3679b8dUL, 0x17250eee885c0b2bUL, 0x90dab52a387ae76fUL, + 0x31fed8d972c49c26UL, 0x89cba8fa461ec463UL, 0x2ff5421677bcabb7UL, 0x396f122f85e41d7dUL, + 0xa09b332430bac6a8UL, 0xc888e8ced7070560UL, 0xaeaf201ac682ee8fUL, 0x1180d7268944a257UL, + 0xf058a43628e7a5fcUL, 0xbd4c4b8fbbce2b07UL, 0xa1246df34abe7b49UL, 0x7d5569b79be9af3cUL, + 0xa9b5a705bd9efa12UL, 0xdb6b835baa4bc0e8UL, 0x05793bac8f147342UL, 0x21c1512881848390UL, + 0xfdb0556c50d357e5UL, 0x613d4fcb6a99ff72UL, 0x03dce2648e0cda3eUL, 0xe949b9e6568386f0UL, + 0xfc0f0bbb2ad7ea04UL, 0x6a70675913b5a417UL, 0x7f36d5046fe1c8e3UL, 0x0c57af8d02304ff8UL, + 0x32223abdfcc84618UL, 0x0891caf6f720815bUL, 0xa63eeaec31a26fd4UL, 0x2507345374944d33UL, + 0x49d28ac266394058UL, 0xf5219f9aa7f3d6beUL, 0x2d96fea583b4cc68UL, 0x5a31e1571b7585d0UL, + 0x8ed12fe53d02d0feUL, 0xdfade6205f5b0e4bUL, 0x4cabb16ee92d331aUL, 0x04c6657bf510cea3UL, + 0xd73c2cd6a87b8f10UL, 0xe1d87310a1a307abUL, 0x6cd5be9112ad0d6bUL, 0x97c032354366f3f2UL, + 0xd4e0ceb22677552eUL, 0x0000000000000000UL, 0x29509bde76a402cbUL, 0xc27a9e8bd42fe3e4UL, + 0x5ef7842cee654b73UL, 0xaf107ecdbc86536eUL, 0x3fcacbe784fcb401UL, 0xd55f90655c73e8cfUL, + 0xe6c2f40fdabf1336UL, 0xe8f6e7312c873b11UL, 0xeb2a0555a28be12fUL, 0xe4a148bc2eb774e9UL, + 0x9b979db84156bc0aUL, 0x6eb60222e6a56ab4UL, 0x87ffbbc4b026ec44UL, 0xc703a5275b3b90a6UL, + 0x47e699fc9001687fUL, 0x9c8d1aa73a4aa897UL, 0x7cea3760e1ed12ddUL, 0x4ec80ddd1d2554c5UL, + 0x13e36b957d4cc588UL, 0x5d2b66486069914dUL, 0x92b90999cc7280b0UL, 0x517cc9c56259deb5UL, + 0xc937b619ad03b881UL, 0xec30824ad997f5b2UL, 0xa45d565fc5aa080bUL, 0xd6837201d27f32f1UL, + 0x635ef3789e9198adUL, 0x531f75769651b96aUL, 0x4f77530a6721e924UL, 0x486dd4151c3dfdb9UL, + 0x5f48dafb9461f692UL, 0x375b011173dc355aUL, 0x3da9775470f4d3deUL, 0x8d0dcd81b30e0ac0UL, + 0x36e45fc609d888bbUL, 0x55baacbe97491016UL, 0x8cb29356c90ab721UL, 0x76184125e2c5f459UL, + 0x99f4210bb55edbd5UL, 0x6f095cf59ca1d755UL, 0x9f51f8c3b44672a9UL, 0x3538bda287d45285UL, + 0x50c39712185d6354UL, 0xf23b1885dcefc223UL, 0x79930ccc6ef9619fUL, 0xed8fdc9da3934853UL, + 0xcb540aaa590bdf5eUL, 0x5c94389f1a6d2cacUL, 0xe77daad8a0bbaed7UL, 0x28efc5090ca0bf2aUL, + 0xbf2ff73c4fc64cd8UL, 0xb37858b14df60320UL, 0xf8c96ec0dfc724a7UL, 0x828680683f329f06UL, + 0x941cd051cd6a29ccUL, 0xc3c5c05cae2b5e05UL, 0xb601631dc2e27062UL, 0xc01922382027843bUL, + 0x24b86a840e90f0d2UL, 0xd245177a276ffc52UL, 0x0f8b4de98c3c95c6UL, 0x3e759530fef809e0UL, + 0x0b4d2892792c5b65UL, 0xc4df4743d5374a98UL, 0xa5e20888bfaeb5eaUL, 0xba56cc90c0d23f9aUL, + 0x38d04cf8ffe0a09cUL, 0x62e1adafe495254cUL, 0x0263bcb3f40867dfUL, 0xcaeb547d230f62bfUL, + 0x6082111c109d4293UL, 0xdad4dd8cd04f7d09UL, 0xefec602e579b2f8cUL, 0x1fb4c4187f7c8a70UL, + 0xffd3e9dfa4db303aUL, 0x7bf0b07f9af10640UL, 0xf49ec14dddf76b5fUL, 0x8f6e713247066d1fUL, + 0x339d646a86ccfbf9UL, 0x64447467e58d8c30UL, 0x2c29a072f9b07189UL, 0xd8b7613f24471ad6UL, + 0x6627c8d41185ebefUL, 0xa347d140beb61c96UL, 0xde12b8f7255fb3aaUL, 0x9d324470404e1576UL, + 0x9306574eb6763d51UL, 0xa80af9d2c79a47f3UL, 0x859c0777442e8b9bUL, 0x69ac853d9db97e29UL + }, + { + 0xc3407dfc2de6377eUL, 0x5b9e93eea4256f77UL, 0xadb58fdd50c845e0UL, 0x5219ff11a75bed86UL, + 0x356b61cfd90b1de9UL, 0xfb8f406e25abe037UL, 0x7a5a0231c0f60796UL, 0x9d3cd216e1f5020bUL, + 0x0c6550fb6b48d8f3UL, 0xf57508c427ff1c62UL, 0x4ad35ffa71cb407dUL, 0x6290a2da1666aa6dUL, + 0xe284ec2349355f9fUL, 0xb3c307c53d7c84ecUL, 0x05e23c0468365a02UL, 0x190bac4d6c9ebfa8UL, + 0x94bbbee9e28b80faUL, 0xa34fc777529cb9b5UL, 0xcc7b39f095bcd978UL, 0x2426addb0ce532e3UL, + 0x7e79329312ce4fc7UL, 0xab09a72eebec2917UL, 0xf8d15499f6b9d6c2UL, 0x1a55b8babf8c895dUL, + 0xdb8add17fb769a85UL, 0xb57f2f368658e81bUL, 0x8acd36f18f3f41f6UL, 0x5ce3b7bba50f11d3UL, + 0x114dcc14d5ee2f0aUL, 0xb91a7fcded1030e8UL, 0x81d5425fe55de7a1UL, 0xb6213bc1554adeeeUL, + 0x80144ef95f53f5f2UL, 0x1e7688186db4c10cUL, 0x3b912965db5fe1bcUL, 0xc281715a97e8252dUL, + 0x54a5d7e21c7f8171UL, 0x4b12535ccbc5522eUL, 0x1d289cefbea6f7f9UL, 0x6ef5f2217d2e729eUL, + 0xe6a7dc819b0d17ceUL, 0x1b94b41c05829b0eUL, 0x33d7493c622f711eUL, 0xdcf7f942fa5ce421UL, + 0x600fba8b7f7a8ecbUL, 0x46b60f011a83988eUL, 0x235b898e0dcf4c47UL, 0x957ab24f588592a9UL, + 0x4354330572b5c28cUL, 0xa5f3ef84e9b8d542UL, 0x8c711e02341b2d01UL, 0x0b1874ae6a62a657UL, + 0x1213d8e306fc19ffUL, 0xfe6d7c6a4d9dba35UL, 0x65ed868f174cd4c9UL, 0x88522ea0e6236550UL, + 0x899322065c2d7703UL, 0xc01e690bfef4018bUL, 0x915982ed8abddaf8UL, 0xbe675b98ec3a4e4cUL, + 0xa996bf7f82f00db1UL, 0xe1daf8d49a27696aUL, 0x2effd5d3dc8986e7UL, 0xd153a51f2b1a2e81UL, + 0x18caa0ebd690adfbUL, 0x390e3134b243c51aUL, 0x2778b92cdff70416UL, 0x029f1851691c24a6UL, + 0x5e7cafeacc133575UL, 0xfa4e4cc89fa5f264UL, 0x5a5f9f481e2b7d24UL, 0x484c47ab18d764dbUL, + 0x400a27f2a1a7f479UL, 0xaeeb9b2a83da7315UL, 0x721c626879869734UL, 0x042330a2d2384851UL, + 0x85f672fd3765aff0UL, 0xba446b3a3e02061dUL, 0x73dd6ecec3888567UL, 0xffac70ccf793a866UL, + 0xdfa9edb5294ed2d4UL, 0x6c6aea7014325638UL, 0x834a5a0e8c41c307UL, 0xcdba35562fb2cb2bUL, + 0x0ad97808d06cb404UL, 0x0f3b440cb85aee06UL, 0xe5f9c876481f213bUL, 0x98deee1289c35809UL, + 0x59018bbfcd394bd1UL, 0xe01bf47220297b39UL, 0xde68e1139340c087UL, 0x9fa3ca4788e926adUL, + 0xbb85679c840c144eUL, 0x53d8f3b71d55ffd5UL, 0x0da45c5dd146caa0UL, 0x6f34fe87c72060cdUL, + 0x57fbc315cf6db784UL, 0xcee421a1fca0fddeUL, 0x3d2d0196607b8d4bUL, 0x642c8a29ad42c69aUL, + 0x14aff010bdd87508UL, 0xac74837beac657b3UL, 0x3216459ad821634dUL, 0x3fb219c70967a9edUL, + 0x06bc28f3bb246cf7UL, 0xf2082c9126d562c6UL, 0x66b39278c45ee23cUL, 0xbd394f6f3f2878b9UL, + 0xfd33689d9e8f8cc0UL, 0x37f4799eb017394fUL, 0x108cc0b26fe03d59UL, 0xda4bd1b1417888d6UL, + 0xb09d1332ee6eb219UL, 0x2f3ed975668794b4UL, 0x58c0871977375982UL, 0x7561463d78ace990UL, + 0x09876cff037e82f1UL, 0x7fb83e35a8c05d94UL, 0x26b9b58a65f91645UL, 0xef20b07e9873953fUL, + 0x3148516d0b3355b8UL, 0x41cb2b541ba9e62aUL, 0x790416c613e43163UL, 0xa011d380818e8f40UL, + 0x3a5025c36151f3efUL, 0xd57095bdf92266d0UL, 0x498d4b0da2d97688UL, 0x8b0c3a57353153a5UL, + 0x21c491df64d368e1UL, 0x8f2f0af5e7091bf4UL, 0x2da1c1240f9bb012UL, 0xc43d59a92ccc49daUL, + 0xbfa6573e56345c1fUL, 0x828b56a8364fd154UL, 0x9a41f643e0df7cafUL, 0xbcf843c985266aeaUL, + 0x2b1de9d7b4bfdce5UL, 0x20059d79dedd7ab2UL, 0x6dabe6d6ae3c446bUL, 0x45e81bf6c991ae7bUL, + 0x6351ae7cac68b83eUL, 0xa432e32253b6c711UL, 0xd092a9b991143cd2UL, 0xcac711032e98b58fUL, + 0xd8d4c9e02864ac70UL, 0xc5fc550f96c25b89UL, 0xd7ef8dec903e4276UL, 0x67729ede7e50f06fUL, + 0xeac28c7af045cf3dUL, 0xb15c1f945460a04aUL, 0x9cfddeb05bfb1058UL, 0x93c69abce3a1fe5eUL, + 0xeb0380dc4a4bdd6eUL, 0xd20db1e8f8081874UL, 0x229a8528b7c15e14UL, 0x44291750739fbc28UL, + 0xd3ccbd4e42060a27UL, 0xf62b1c33f4ed2a97UL, 0x86a8660ae4779905UL, 0xd62e814a2a305025UL, + 0x477703a7a08d8addUL, 0x7b9b0e977af815c5UL, 0x78c51a60a9ea2330UL, 0xa6adfb733aaae3b7UL, + 0x97e5aa1e3199b60fUL, 0x0000000000000000UL, 0xf4b404629df10e31UL, 0x5564db44a6719322UL, + 0x9207961a59afec0dUL, 0x9624a6b88b97a45cUL, 0x363575380a192b1cUL, 0x2c60cd82b595a241UL, + 0x7d272664c1dc7932UL, 0x7142769faa94a1c1UL, 0xa1d0df263b809d13UL, 0x1630e841d4c451aeUL, + 0xc1df65ad44fa13d8UL, 0x13d2d445bcf20bacUL, 0xd915c546926abe23UL, 0x38cf3d92084dd749UL, + 0xe766d0272103059dUL, 0xc7634d5effde7f2fUL, 0x077d2455012a7ea4UL, 0xedbfa82ff16fb199UL, + 0xaf2a978c39d46146UL, 0x42953fa3c8bbd0dfUL, 0xcb061da59496a7dcUL, 0x25e7a17db6eb20b0UL, + 0x34aa6d6963050fbaUL, 0xa76cf7d580a4f1e4UL, 0xf7ea10954ee338c4UL, 0xfcf2643b24819e93UL, + 0xcf252d0746aeef8dUL, 0x4ef06f58a3f3082cUL, 0x563acfb37563a5d7UL, 0x5086e740ce47c920UL, + 0x2982f186dda3f843UL, 0x87696aac5e798b56UL, 0x5d22bb1d1f010380UL, 0x035e14f7d31236f5UL, + 0x3cec0d30da759f18UL, 0xf3c920379cdb7095UL, 0xb8db736b571e22bbUL, 0xdd36f5e44052f672UL, + 0xaac8ab8851e23b44UL, 0xa857b3d938fe1fe2UL, 0x17f1e4e76eca43fdUL, 0xec7ea4894b61a3caUL, + 0x9e62c6e132e734feUL, 0xd4b1991b432c7483UL, 0x6ad6c283af163acfUL, 0x1ce9904904a8e5aaUL, + 0x5fbda34c761d2726UL, 0xf910583f4cb7c491UL, 0xc6a241f845d06d7cUL, 0x4f3163fe19fd1a7fUL, + 0xe99c988d2357f9c8UL, 0x8eee06535d0709a7UL, 0x0efa48aa0254fc55UL, 0xb4be23903c56fa48UL, + 0x763f52caabbedf65UL, 0xeee1bcd8227d876cUL, 0xe345e085f33b4dccUL, 0x3e731561b369bbbeUL, + 0x2843fd2067adea10UL, 0x2adce5710eb1ceb6UL, 0xb7e03767ef44ccbdUL, 0x8db012a48e153f52UL, + 0x61ceb62dc5749c98UL, 0xe85d942b9959eb9bUL, 0x4c6f7709caef2c8aUL, 0x84377e5b8d6bbda3UL, + 0x30895dcbb13d47ebUL, 0x74a04a9bc2a2fbc3UL, 0x6b17ce251518289cUL, 0xe438c4d0f2113368UL, + 0x1fb784bed7bad35fUL, 0x9b80fae55ad16efcUL, 0x77fe5e6c11b0cd36UL, 0xc858095247849129UL, + 0x08466059b97090a2UL, 0x01c10ca6ba0e1253UL, 0x6988d6747c040c3aUL, 0x6849dad2c60a1e69UL, + 0x5147ebe67449db73UL, 0xc99905f4fd8a837aUL, 0x991fe2b433cd4a5aUL, 0xf09734c04fc94660UL, + 0xa28ecbd1e892abe6UL, 0xf1563866f5c75433UL, 0x4dae7baf70e13ed9UL, 0x7ce62ac27bd26b61UL, + 0x70837a39109ab392UL, 0x90988e4b30b3c8abUL, 0xb2020b63877296bfUL, 0x156efcb607d6675bUL + }, + { + 0xe63f55ce97c331d0UL, 0x25b506b0015bba16UL, 0xc8706e29e6ad9ba8UL, 0x5b43d3775d521f6aUL, + 0x0bfa3d577035106eUL, 0xab95fc172afb0e66UL, 0xf64b63979e7a3276UL, 0xf58b4562649dad4bUL, + 0x48f7c3dbae0c83f1UL, 0xff31916642f5c8c5UL, 0xcbb048dc1c4a0495UL, 0x66b8f83cdf622989UL, + 0x35c130e908e2b9b0UL, 0x7c761a61f0b34fa1UL, 0x3601161cf205268dUL, 0x9e54ccfe2219b7d6UL, + 0x8b7d90a538940837UL, 0x9cd403588ea35d0bUL, 0xbc3c6fea9ccc5b5aUL, 0xe5ff733b6d24aeedUL, + 0xceed22de0f7eb8d2UL, 0xec8581cab1ab545eUL, 0xb96105e88ff8e71dUL, 0x8ca03501871a5eadUL, + 0x76ccce65d6db2a2fUL, 0x5883f582a7b58057UL, 0x3f7be4ed2e8adc3eUL, 0x0fe7be06355cd9c9UL, + 0xee054e6c1d11be83UL, 0x1074365909b903a6UL, 0x5dde9f80b4813c10UL, 0x4a770c7d02b6692cUL, + 0x5379c8d5d7809039UL, 0xb4067448161ed409UL, 0x5f5e5026183bd6cdUL, 0xe898029bf4c29df9UL, + 0x7fb63c940a54d09cUL, 0xc5171f897f4ba8bcUL, 0xa6f28db7b31d3d72UL, 0x2e4f3be7716eaa78UL, + 0x0d6771a099e63314UL, 0x82076254e41bf284UL, 0x2f0fd2b42733df98UL, 0x5c9e76d3e2dc49f0UL, + 0x7aeb569619606cdbUL, 0x83478b07b2468764UL, 0xcfadcb8d5923cd32UL, 0x85dac7f05b95a41eUL, + 0xb5469d1b4043a1e9UL, 0xb821ecbbd9a592fdUL, 0x1b8e0b0e798c13c8UL, 0x62a57b6d9a0be02eUL, + 0xfcf1b793b81257f8UL, 0x9d94ea0bd8fe28ebUL, 0x4cea408aeb654a56UL, 0x23284a47e888996cUL, + 0x2d8f1d128b893545UL, 0xf4cbac3132c0d8abUL, 0xbd7c86b9ca912ebaUL, 0x3a268eef3dbe6079UL, + 0xf0d62f6077a9110cUL, 0x2735c916ade150cbUL, 0x89fd5f03942ee2eaUL, 0x1acee25d2fd16628UL, + 0x90f39bab41181bffUL, 0x430dfe8cde39939fUL, 0xf70b8ac4c8274796UL, 0x1c53aeaac6024552UL, + 0x13b410acf35e9c9bUL, 0xa532ab4249faa24fUL, 0x2b1251e5625a163fUL, 0xd7e3e676da4841c7UL, + 0xa7b264e4e5404892UL, 0xda8497d643ae72d3UL, 0x861ae105a1723b23UL, 0x38a6414991048aa4UL, + 0x6578dec92585b6b4UL, 0x0280cfa6acbaeaddUL, 0x88bdb650c273970aUL, 0x9333bd5ebbff84c2UL, + 0x4e6a8f2c47dfa08bUL, 0x321c954db76cef2aUL, 0x418d312a72837942UL, 0xb29b38bfffcdf773UL, + 0x6c022c38f90a4c07UL, 0x5a033a240b0f6a8aUL, 0x1f93885f3ce5da6fUL, 0xc38a537e96988bc6UL, + 0x39e6a81ac759ff44UL, 0x29929e43cee0fce2UL, 0x40cdd87924de0ca2UL, 0xe9d8ebc8a29fe819UL, + 0x0c2798f3cfbb46f4UL, 0x55e484223e53b343UL, 0x4650948ecd0d2fd8UL, 0x20e86cb2126f0651UL, + 0x6d42c56baf5739e7UL, 0xa06fc1405ace1e08UL, 0x7babbfc54f3d193bUL, 0x424d17df8864e67fUL, + 0xd8045870ef14980eUL, 0xc6d7397c85ac3781UL, 0x21a885e1443273b1UL, 0x67f8116f893f5c69UL, + 0x24f5efe35706cff6UL, 0xd56329d076f2ab1aUL, 0x5e1eb9754e66a32dUL, 0x28d2771098bd8902UL, + 0x8f6013f47dfdc190UL, 0x17a993fdb637553cUL, 0xe0a219397e1012aaUL, 0x786b9930b5da8606UL, + 0x6e82e39e55b0a6daUL, 0x875a0856f72f4ec3UL, 0x3741ff4fa458536dUL, 0xac4859b3957558fcUL, + 0x7ef6d5c75c09a57cUL, 0xc04a758b6c7f14fbUL, 0xf9acdd91ab26ebbfUL, 0x7391a467c5ef9668UL, + 0x335c7c1ee1319acaUL, 0xa91533b18641e4bbUL, 0xe4bf9a683b79db0dUL, 0x8e20faa72ba0b470UL, + 0x51f907737b3a7ae4UL, 0x2268a314bed5ec8cUL, 0xd944b123b949edeeUL, 0x31dcb3b84d8b7017UL, + 0xd3fe65279f218860UL, 0x097af2f1dc8ffab3UL, 0x9b09a6fc312d0b91UL, 0xcc6ded78a3c4520fUL, + 0x3481d9ba5ebfcc50UL, 0x4f2a667f1182d56bUL, 0xdfd9fdd4509ace94UL, 0x26752045fbbc252bUL, + 0xbffc491f662bc467UL, 0xdd593272fc202449UL, 0x3cbbc218d46d4303UL, 0x91b372f817456e1fUL, + 0x681faf69bc6385a0UL, 0xb686bbeebaa43ed4UL, 0x1469b5084cd0ca01UL, 0x98c98009cbca94acUL, + 0x6438379a73d8c354UL, 0xc2caba2dc0c5fe26UL, 0x3e3b0dbe78d7a9deUL, 0x50b9ee202d670f04UL, + 0x4590b27b37eab0e5UL, 0x6025b4cb36b10af3UL, 0xfb2c1237079c0162UL, 0xa12f28130c936be8UL, + 0x4b37e52e54eb1cccUL, 0x083a1ba28ad28f53UL, 0xc10a9cd83a22611bUL, 0x9f1425ad7444c236UL, + 0x069d4cf7e9d3237aUL, 0xedc56899e7f621beUL, 0x778c273680865fcfUL, 0x309c5aeb1bd605f7UL, + 0x8de0dc52d1472b4dUL, 0xf8ec34c2fd7b9e5fUL, 0xea18cd3d58787724UL, 0xaad515447ca67b86UL, + 0x9989695a9d97e14cUL, 0x0000000000000000UL, 0xf196c63321f464ecUL, 0x71116bc169557cb5UL, + 0xaf887f466f92c7c1UL, 0x972e3e0ffe964d65UL, 0x190ec4a8d536f915UL, 0x95aef1a9522ca7b8UL, + 0xdc19db21aa7d51a9UL, 0x94ee18fa0471d258UL, 0x8087adf248a11859UL, 0xc457f6da2916dd5cUL, + 0xfa6cfb6451c17482UL, 0xf256e0c6db13fbd1UL, 0x6a9f60cf10d96f7dUL, 0x4daaa9d9bd383fb6UL, + 0x03c026f5fae79f3dUL, 0xde99148706c7bb74UL, 0x2a52b8b6340763dfUL, 0x6fc20acd03edd33aUL, + 0xd423c08320afdefaUL, 0xbbe1ca4e23420dc0UL, 0x966ed75ca8cb3885UL, 0xeb58246e0e2502c4UL, + 0x055d6a021334bc47UL, 0xa47242111fa7d7afUL, 0xe3623fcc84f78d97UL, 0x81c744a11efc6db9UL, + 0xaec8961539cfb221UL, 0xf31609958d4e8e31UL, 0x63e5923ecc5695ceUL, 0x47107ddd9b505a38UL, + 0xa3afe7b5a0298135UL, 0x792b7063e387f3e6UL, 0x0140e953565d75e0UL, 0x12f4f9ffa503e97bUL, + 0x750ce8902c3cb512UL, 0xdbc47e8515f30733UL, 0x1ed3610c6ab8af8fUL, 0x5239218681dde5d9UL, + 0xe222d69fd2aaf877UL, 0xfe71783514a8bd25UL, 0xcaf0a18f4a177175UL, 0x61655d9860ec7f13UL, + 0xe77fbc9dc19e4430UL, 0x2ccff441ddd440a5UL, 0x16e97aaee06a20dcUL, 0xa855dae2d01c915bUL, + 0x1d1347f9905f30b2UL, 0xb7c652bdecf94b34UL, 0xd03e43d265c6175dUL, 0xfdb15ec0ee4f2218UL, + 0x57644b8492e9599eUL, 0x07dda5a4bf8e569aUL, 0x54a46d71680ec6a3UL, 0x5624a2d7c4b42c7eUL, + 0xbebca04c3076b187UL, 0x7d36f332a6ee3a41UL, 0x3b6667bc6be31599UL, 0x695f463aea3ef040UL, + 0xad08b0e0c3282d1cUL, 0xb15b1e4a052a684eUL, 0x44d05b2861b7c505UL, 0x15295c5b1a8dbfe1UL, + 0x744c01c37a61c0f2UL, 0x59c31cd1f1e8f5b7UL, 0xef45a73f4b4ccb63UL, 0x6bdf899c46841a9dUL, + 0x3dfb2b4b823036e3UL, 0xa2ef0ee6f674f4d5UL, 0x184e2dfb836b8cf5UL, 0x1134df0a5fe47646UL, + 0xbaa1231d751f7820UL, 0xd17eaa81339b62bdUL, 0xb01bf71953771daeUL, 0x849a2ea30dc8d1feUL, + 0x705182923f080955UL, 0x0ea757556301ac29UL, 0x041d83514569c9a7UL, 0x0abad4042668658eUL, + 0x49b72a88f851f611UL, 0x8a3d79f66ec97dd7UL, 0xcd2d042bf59927efUL, 0xc930877ab0f0ee48UL, + 0x9273540deda2f122UL, 0xc797d02fd3f14261UL, 0xe1e2f06a284d674aUL, 0xd2be8c74c97cfd80UL, + 0x9a494faf67707e71UL, 0xb3dbd1eca9908293UL, 0x72d14d3493b2e388UL, 0xd6a30f258c153427UL + } +}; + +inline void +GOST34112012Init(GOST34112012Context *CTX, const uint digest_size) +{ + CTX->buffer.VWORD = 0; +#if STREEBOG256CRYPT + CTX->h.VWORD = 0x01010101U; +#elif STREEBOG512CRYPT + CTX->h.VWORD = 0; +#else + CTX->digest_size = digest_size; + + if (digest_size == 256) + CTX->h.VWORD = 0x01010101U; + else + CTX->h.VWORD = 0; +#endif + CTX->N.VWORD = 0; + CTX->Sigma.VWORD = 0; + CTX->bufsize = 0; +} + +inline void +pad(GOST34112012Context *CTX) +{ + if (CTX->bufsize > 63) + return; + + memset_p(CTX->buffer.BYTES + CTX->bufsize, 0, sizeof(CTX->buffer.BYTES) - CTX->bufsize); + + CTX->buffer.BYTES[CTX->bufsize] = 0x01; +} + +/* Let r = x + y modulo 2^512 */ +inline void +add512(const uint512_u *x, const uint512_u *y, uint512_u *r) +{ + uint CF; + int i; + + CF = 0; +#if STREEBOG_UNROLL +#pragma unroll 8 +#endif + for (i = 0; i < 8; i++) { + const ulong left = x->QWORD[i]; + ulong sum; + + sum = left + y->QWORD[i] + CF; + if (sum != left) + CF = (sum < left); + r->QWORD[i] = sum; + } +} + +inline void +g(uint512_u *h, const uint512_u *N, const uint512_u *m, __local localbuf *loc_buf) +{ + uint512_u Ki, data; + + XLPS(h, N, &data); + + /* Starting E() */ + Ki = data; + XLPS(&Ki, m, &data); + +#if STREEBOG_UNROLL +#if STREEBOG_MANUAL_UNROLL + ROUND(0, &Ki, &data); + ROUND(1, &Ki, &data); + ROUND(2, &Ki, &data); + ROUND(3, &Ki, &data); + ROUND(4, &Ki, &data); + ROUND(5, &Ki, &data); + ROUND(6, &Ki, &data); + ROUND(7, &Ki, &data); + ROUND(8, &Ki, &data); + ROUND(9, &Ki, &data); + ROUND(10, &Ki, &data); +#else +#pragma unroll 11 +#endif +#endif +#if !STREEBOG_MANUAL_UNROLL + for (uint i = 0; i < 11; i++) + ROUND(i, &Ki, &data); +#endif + XLPS(&Ki, &C[11], &Ki); + XOR512(&Ki, &data, &data); + /* E() done */ + + XOR512(&data, h, &data); + XOR512(&data, m, h); +} + +// Special case of the above where N is all zeros +inline void +g0(uint512_u *h, const uint512_u *m, __local localbuf *loc_buf) +{ + uint512_u Ki, data; + + XLPS0(h, &data); + + /* Starting E() */ + Ki = data; + XLPS(&Ki, m, &data); + +#if STREEBOG_UNROLL +#if STREEBOG_MANUAL_UNROLL + ROUND(0, &Ki, &data); + ROUND(1, &Ki, &data); + ROUND(2, &Ki, &data); + ROUND(3, &Ki, &data); + ROUND(4, &Ki, &data); + ROUND(5, &Ki, &data); + ROUND(6, &Ki, &data); + ROUND(7, &Ki, &data); + ROUND(8, &Ki, &data); + ROUND(9, &Ki, &data); + ROUND(10, &Ki, &data); +#else +#pragma unroll 11 +#endif +#endif +#if !STREEBOG_MANUAL_UNROLL + for (uint i = 0; i < 11; i++) + ROUND(i, &Ki, &data); +#endif + + XLPS(&Ki, &C[11], &Ki); + XOR512(&Ki, &data, &data); + /* E() done */ + + XOR512(&data, h, &data); + XOR512(&data, m, h); +} + +inline void +stage2(GOST34112012Context *CTX, const uint512_u *data, __local localbuf *loc_buf) +{ + const uint512_u buffer512 = {{ 0x0000000000000200UL, 0, 0, 0, 0, 0, 0, 0 }}; + + uint512_u m; + + memcpy512(&m, data); + + g(&(CTX->h), &(CTX->N), &m, loc_buf); + + add512(&(CTX->N), &buffer512, &(CTX->N)); + add512(&(CTX->Sigma), &m, &(CTX->Sigma)); +} + +inline void +stage2d(GOST34112012Context *CTX, const uchar *data, __local localbuf *loc_buf) +{ + const uint512_u buffer512 = {{ 0x0000000000000200UL, 0, 0, 0, 0, 0, 0, 0 }}; + + uint512_u m; + + /* May be unaligned */ + memcpy_pp(m.BYTES, data, sizeof(m)); + + g(&(CTX->h), &(CTX->N), &m, loc_buf); + + add512(&(CTX->N), &buffer512, &(CTX->N)); + add512(&(CTX->Sigma), &m, &(CTX->Sigma)); +} + +inline void +stage3(GOST34112012Context *CTX, __local localbuf *loc_buf) +{ + uint512_u buf = {{ 0 }}; + + buf.QWORD[0] = CTX->bufsize << 3; + + pad(CTX); + + g(&(CTX->h), &(CTX->N), &(CTX->buffer), loc_buf); + + add512(&(CTX->N), &buf, &(CTX->N)); + add512(&(CTX->Sigma), &(CTX->buffer), &(CTX->Sigma)); + + g0(&(CTX->h), &(CTX->N), loc_buf); + g0(&(CTX->h), &(CTX->Sigma), loc_buf); +} + +inline void +GOST34112012Update(GOST34112012Context *CTX, const uchar *data, uint len, __local localbuf *loc_buf) +{ + if (CTX->bufsize) { + uint chunksize = MIN(len, 64 - CTX->bufsize); + + memcpy_pp(&CTX->buffer.BYTES[CTX->bufsize], data, chunksize); + + CTX->bufsize += chunksize; + len -= chunksize; + data += chunksize; + + if (CTX->bufsize == 64) { + stage2(CTX, &(CTX->buffer), loc_buf); + CTX->bufsize = 0; + } + } + + while (len > 63) { + stage2d(CTX, data, loc_buf); + + data += 64; + len -= 64; + } + + if (len) { + memcpy_pp(&CTX->buffer.BYTES, data, len); + CTX->bufsize = len; + } +} + +inline void +GOST34112012Final(GOST34112012Context *CTX, uint512_u *digest, __local localbuf *loc_buf) +{ + stage3(CTX, loc_buf); + + CTX->bufsize = 0; + +#if STREEBOG512CRYPT + memcpy512(digest, &(CTX->h)); +#elif STREEBOG256CRYPT + for (uint i = 0; i < 8; i++) + digest->DWORD[i] = CTX->h.DWORD[8 + i]; +#else + if (CTX->digest_size == 256) + for (uint i = 0; i < 8; i++) + digest->DWORD[i] = CTX->h.DWORD[8 + i]; + else + memcpy512(digest, &(CTX->h)); +#endif +} + +#endif /* _OPENCL_STREEBOG_H */ diff --git a/src/gost12256hash_fmt_plug.c b/src/gost12256hash_fmt_plug.c new file mode 100644 index 0000000000..41f861b4f9 --- /dev/null +++ b/src/gost12256hash_fmt_plug.c @@ -0,0 +1,441 @@ +/* + * This software is Copyright (c) 2022 magnum, + * and it is hereby released to the general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#if FMT_EXTERNS_H +extern struct fmt_main fmt_gost12256hash; +#elif FMT_REGISTERS_H +john_register_one(&fmt_gost12256hash); +#else + +#define _GNU_SOURCE 1 +#include + +#ifdef _OPENMP +#include +#endif + +#include "arch.h" +#include "gost3411-2012-core.h" +#include "params.h" +#include "common.h" +#include "formats.h" +#include "johnswap.h" + +#ifndef OMP_SCALE +#define OMP_SCALE 1 // This and MKPC tuned for core i7 +#endif + +#define FORMAT_LABEL "streebog256crypt" +#define FORMAT_NAME "Astra Linux $gost12256hash$" + +#if !JOHN_NO_SIMD && __AVX__ +#define ALGORITHM_NAME "GOST R 34.11-2012 128/128 AVX 1x" +#elif !JOHN_NO_SIMD && __SSE2__ +#define ALGORITHM_NAME "GOST R 34.11-2012 128/128 SSE2 1x" +#else +#define ALGORITHM_NAME "GOST R 34.11-2012 64/" ARCH_BITS_STR +#endif + +#define PLAINTEXT_LENGTH 125 + +#define SALT_SIZE sizeof(struct saltstruct) + +#define MIN_KEYS_PER_CRYPT 1 +#define MAX_KEYS_PER_CRYPT 1 + +#define BLKS MIN_KEYS_PER_CRYPT + +/* Prefix for optional rounds specification. */ +#define ROUNDS_PREFIX "rounds=" +/* Default number of rounds if not explicitly specified. */ +#define ROUNDS_DEFAULT 5000 +/* Minimum number of rounds. */ +#define ROUNDS_MIN 1 /* Drepper has it as 1000 */ +/* Maximum number of rounds. */ +#define ROUNDS_MAX 999999999 + +#define BENCHMARK_COMMENT " (rounds=5000)" +#define BENCHMARK_LENGTH 0x107 +#define CIPHERTEXT_LENGTH 43 + +#define BINARY_SIZE 32 +#define BINARY_ALIGN 4 +#define SALT_LENGTH 16 +#define SALT_ALIGN 4 +#define FORMAT_TAG "$gost12256hash$" +#define FORMAT_TAG_LEN (sizeof(FORMAT_TAG)-1) + +static int (*saved_len); +static char (*saved_key)[PLAINTEXT_LENGTH + 1]; +static uint32_t (*crypt_out)[BINARY_SIZE / sizeof(uint32_t)]; + +static struct saltstruct { + unsigned int len; + unsigned int rounds; + unsigned char salt[SALT_LENGTH]; +} *cur_salt; + +static struct fmt_tests tests[] = { + {"$gost12256hash$password$awrQfwgXMa0BFMCtZu97GJKqeVszI/B2usmTf9cpOa/", "magnum"}, + {"$gost12256hash$longersalt$KP7Eyt1XM83PbW3jtvOAtsQUQUf0EKZBP0UqFds7AU7", "longerpassword"}, + {"$gost12256hash$eVszI/B2usmT$gGPHrK8MAsv/KLAcLhSXZES5OdI9dMFQONmIpUDNzi5", "password"}, + {NULL} +}; + +/* ------- Check if the ciphertext if a valid gost12256hash crypt ------- */ +static int valid(char * ciphertext, struct fmt_main * self) { + char *pos, *start; + + if (strncmp(ciphertext, FORMAT_TAG, FORMAT_TAG_LEN)) + return 0; + + ciphertext += FORMAT_TAG_LEN; + + if (!strncmp(ciphertext, ROUNDS_PREFIX, sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + if (!strtoul(num, &endp, 10)) + return 0; + if (*endp == '$') + ciphertext = endp + 1; + } + for (pos = ciphertext; *pos && *pos != '$'; pos++); + if (!*pos || pos < ciphertext) return 0; + + start = ++pos; + while (atoi64[ARCH_INDEX(*pos)] != 0x7F) pos++; + if (*pos || pos - start != CIPHERTEXT_LENGTH) return 0; + return 1; +} + +/* ------- To binary functions ------- */ +#define TO_BINARY(b1, b2, b3) \ + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[3])] << 18); \ + pos += 4; \ + out[b1] = value >> 16; \ + out[b2] = value >> 8; \ + out[b3] = value; + +static void * get_binary(char * ciphertext) { + static uint32_t outbuf[BINARY_SIZE/4]; + uint32_t value; + char *pos = strrchr(ciphertext, '$') + 1; + unsigned char *out = (unsigned char*)outbuf; + int i=0; + + do { + TO_BINARY(i, (i+10)%30, (i+20)%30); + i = (i+21)%30; + } while (i != 0); + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12); + out[31] = value >> 8; + out[30] = value; + return (void *)out; +} + +static void init(struct fmt_main *self) +{ + omp_autotune(self, OMP_SCALE); + + saved_len = mem_calloc(self->params.max_keys_per_crypt, sizeof(*saved_len)); + saved_key = mem_calloc(self->params.max_keys_per_crypt, sizeof(*saved_key)); + crypt_out = mem_calloc(self->params.max_keys_per_crypt, sizeof(*crypt_out)); +} + +static void done(void) +{ + MEM_FREE(crypt_out); + MEM_FREE(saved_key); + MEM_FREE(saved_len); +} + +#define COMMON_GET_HASH_VAR crypt_out +#include "common-get-hash.h" + +static void set_key(char *key, int index) +{ + saved_len[index] = strnzcpyn(saved_key[index], key, sizeof(*saved_key)); +} + +static char *get_key(int index) +{ + saved_key[index][saved_len[index]] = 0; + return saved_key[index]; +} + +static int crypt_all(int *pcount, struct db_salt *salt) +{ + const int count = *pcount; + int index; + +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (index = 0; index < count; index++) { + unsigned char temp_result[BINARY_SIZE]; + GOST34112012Context ctx; + GOST34112012Context alt_ctx; + size_t cnt; + unsigned char *cp; + unsigned char p_bytes[PLAINTEXT_LENGTH + 1]; + unsigned char s_bytes[PLAINTEXT_LENGTH + 1]; + + /* Prepare for the real work. */ + GOST34112012Init(&ctx, 256); + + /* Add the key string. */ + GOST34112012Update(&ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* The last part is the salt string. This must be at most 16 + characters and it ends at the first `$' character (for + compatibility with existing implementations). */ + GOST34112012Update(&ctx, cur_salt->salt, cur_salt->len); + + + /* Compute alternate GOST sum with input KEY, SALT, and KEY. The + final result will be added to the first context. */ + GOST34112012Init(&alt_ctx, 256); + + /* Add key. */ + GOST34112012Update(&alt_ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Add salt. */ + GOST34112012Update(&alt_ctx, cur_salt->salt, cur_salt->len); + + /* Add key again. */ + GOST34112012Update(&alt_ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Now get result of this (32 bytes) and add it to the other context. */ + GOST34112012Final(&alt_ctx, (unsigned char*)crypt_out[index]); + + /* Add for any character in the key one byte of the alternate sum. */ + for (cnt = saved_len[index]; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], cnt); + + /* Take the binary representation of the length of the key and for every + 1 add the alternate sum, for every 0 the key. */ + for (cnt = saved_len[index]; cnt > 0; cnt >>= 1) + if ((cnt & 1) != 0) + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + else + GOST34112012Update(&ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, (unsigned char*)crypt_out[index]); + + /* Start computation of P byte sequence. */ + GOST34112012Init(&alt_ctx, 256); + + /* For every character in the password add the entire password. */ + for (cnt = 0; cnt < saved_len[index]; ++cnt) + GOST34112012Update(&alt_ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Finish the digest. */ + GOST34112012Final(&alt_ctx, temp_result); + + /* Create byte sequence P. */ + cp = p_bytes; + for (cnt = saved_len[index]; cnt >= BINARY_SIZE; cnt -= BINARY_SIZE) + cp = (unsigned char*)memcpy(cp, temp_result, BINARY_SIZE) + BINARY_SIZE; + memcpy(cp, temp_result, cnt); + + /* Start computation of S byte sequence. */ + GOST34112012Init(&alt_ctx, 256); + + /* For every character in the password add the entire password. */ + for (cnt = 0; cnt < 16 + ((unsigned char*)crypt_out[index])[0]; ++cnt) + GOST34112012Update(&alt_ctx, cur_salt->salt, cur_salt->len); + + /* Finish the digest. */ + GOST34112012Final(&alt_ctx, temp_result); + + /* Create byte sequence S. */ + cp = s_bytes; + for (cnt = cur_salt->len; cnt >= BINARY_SIZE; cnt -= BINARY_SIZE) + cp = (unsigned char*)memcpy(cp, temp_result, BINARY_SIZE) + BINARY_SIZE; + memcpy(cp, temp_result, cnt); + + /* Repeatedly run the collected hash value through GOST to burn CPU cycles. */ + for (cnt = 0; cnt < cur_salt->rounds; ++cnt) { + /* New context. */ + GOST34112012Init(&ctx, 256); + + /* Add key or last result. */ + if ((cnt & 1) != 0) + GOST34112012Update(&ctx, p_bytes, saved_len[index]); + else + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + + /* Add salt for numbers not divisible by 3. */ + if (cnt % 3 != 0) + GOST34112012Update(&ctx, s_bytes, cur_salt->len); + + /* Add key for numbers not divisible by 7. */ + if (cnt % 7 != 0) + GOST34112012Update(&ctx, p_bytes, saved_len[index]); + + /* Add key or last result. */ + if ((cnt & 1) != 0) + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + else + GOST34112012Update(&ctx, p_bytes, saved_len[index]); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, (unsigned char*)crypt_out[index]); + } + } + + return count; +} + +static void set_salt(void *salt) +{ + cur_salt = salt; +} + +static void *get_salt(char *ciphertext) +{ + static struct saltstruct out; + int len; + + memset(&out, 0, sizeof(out)); + out.rounds = ROUNDS_DEFAULT; + ciphertext += FORMAT_TAG_LEN; + if (!strncmp(ciphertext, ROUNDS_PREFIX, + sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + unsigned long int srounds = strtoul(num, &endp, 10); + if (*endp == '$') + { + ciphertext = endp + 1; + srounds = srounds < ROUNDS_MIN ? + ROUNDS_MIN : srounds; + out.rounds = srounds > ROUNDS_MAX ? + ROUNDS_MAX : srounds; + } + } + + for (len = 0; ciphertext[len] != '$'; len++); + + if (len > SALT_LENGTH) + len = SALT_LENGTH; + + memcpy(out.salt, ciphertext, len); + out.len = len; + return &out; +} + +static int cmp_all(void *binary, int count) +{ + int index; + + for (index = 0; index < count; index++) + if (!memcmp(binary, crypt_out[index], ARCH_SIZE)) + return 1; + return 0; +} + +static int cmp_one(void *binary, int index) +{ + return !memcmp(binary, crypt_out[index], BINARY_SIZE); +} + +static int cmp_exact(char *source, int index) +{ + return 1; +} + +static unsigned int iteration_count(void *salt) +{ + struct saltstruct *gost12256hash_salt; + + gost12256hash_salt = salt; + return (unsigned int)gost12256hash_salt->rounds; +} + +// We are hashing the entire struct +static int salt_hash(void *salt) +{ + unsigned char *s = salt; + unsigned int hash = 5381; + unsigned int i; + + for (i = 0; i < SALT_SIZE; i++) + hash = ((hash << 5) + hash) ^ s[i]; + + return hash & (SALT_HASH_SIZE - 1); +} + +struct fmt_main fmt_gost12256hash = { + { + FORMAT_LABEL, + FORMAT_NAME, + ALGORITHM_NAME, + BENCHMARK_COMMENT, + BENCHMARK_LENGTH, + 0, + PLAINTEXT_LENGTH, + BINARY_SIZE, + BINARY_ALIGN, + SALT_SIZE, + SALT_ALIGN, + MIN_KEYS_PER_CRYPT, + MAX_KEYS_PER_CRYPT, + FMT_CASE | FMT_8_BIT | FMT_OMP, + { + "iteration count", + }, + { FORMAT_TAG }, + tests + }, { + init, + done, + fmt_default_reset, + fmt_default_prepare, + valid, + fmt_default_split, + get_binary, + get_salt, + { + iteration_count, + }, + fmt_default_source, + { + fmt_default_binary_hash_0, + fmt_default_binary_hash_1, + fmt_default_binary_hash_2, + fmt_default_binary_hash_3, + fmt_default_binary_hash_4, + fmt_default_binary_hash_5, + fmt_default_binary_hash_6 + }, + salt_hash, + NULL, + set_salt, + set_key, + get_key, + fmt_default_clear_keys, + crypt_all, + { +#define COMMON_GET_HASH_LINK +#include "common-get-hash.h" + }, + cmp_all, + cmp_one, + cmp_exact + } +}; + +#endif /* plugin stanza */ diff --git a/src/gost12512hash_fmt_plug.c b/src/gost12512hash_fmt_plug.c new file mode 100644 index 0000000000..801d04e3dd --- /dev/null +++ b/src/gost12512hash_fmt_plug.c @@ -0,0 +1,442 @@ +/* + * This software is Copyright (c) 2022 magnum, + * and it is hereby released to the general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#if FMT_EXTERNS_H +extern struct fmt_main fmt_gost12512hash; +#elif FMT_REGISTERS_H +john_register_one(&fmt_gost12512hash); +#else + +#define _GNU_SOURCE 1 +#include + +#ifdef _OPENMP +#include +#endif + +#include "arch.h" +#include "gost3411-2012-core.h" +#include "params.h" +#include "common.h" +#include "formats.h" +#include "johnswap.h" + +#ifndef OMP_SCALE +#define OMP_SCALE 4 // This and MKPC tuned for core i7 +#endif + +#define FORMAT_LABEL "streebog512crypt" +#define FORMAT_NAME "Astra Linux $gost12512hash$" + +#if !JOHN_NO_SIMD && __AVX__ +#define ALGORITHM_NAME "GOST R 34.11-2012 128/128 AVX 1x" +#elif !JOHN_NO_SIMD && __SSE2__ +#define ALGORITHM_NAME "GOST R 34.11-2012 128/128 SSE2 1x" +#else +#define ALGORITHM_NAME "GOST R 34.11-2012 64/" ARCH_BITS_STR +#endif + +#define PLAINTEXT_LENGTH 125 + +#define BINARY_ALIGN 4 +#define SALT_SIZE sizeof(struct saltstruct) +#define SALT_ALIGN 4 + +#define MIN_KEYS_PER_CRYPT 1 +#define MAX_KEYS_PER_CRYPT 1 + +#define BINARY_SIZE 64 +#define SALT_LENGTH 16 +#define CIPHERTEXT_LENGTH 86 + +/* ------ Contains (at least) prepare(), valid() and split() ------ */ +/* Prefix for optional rounds specification. */ +#define ROUNDS_PREFIX "rounds=" +/* Default number of rounds if not explicitly specified. */ +#define ROUNDS_DEFAULT 5000 +/* Minimum number of rounds. */ +#define ROUNDS_MIN 1 /* Drepper has it as 1000 */ +/* Maximum number of rounds. */ +#define ROUNDS_MAX 999999999 + +#define BENCHMARK_COMMENT " (rounds=5000)" +#define BENCHMARK_LENGTH 0x107 +#define FORMAT_TAG "$gost12512hash$" +#define FORMAT_TAG_LEN (sizeof(FORMAT_TAG)-1) + +static struct fmt_tests tests[] = { + {"$gost12512hash$salt$nwv1Oqm2zL9523APlKI6Fpmlm56UzSe.J1wDzMk61S2zxa1nWY4U9d3oP5UyOpZQvTzVPEZWRnfRd/yJZeXp2.", "magnum"}, + {"$gost12512hash$longersalt$Ru..ARXlil3CeBqecyE9EBW1tfZZoPPnnwWJKy7nbhOnzmXpldy3nt1iB1QM21KD73L1tm/zWsiyU05zHluwO1", "longerpassword"}, + {"$gost12512hash$KrmcHbeQmxiP/Ipl$uNmEQvpJ7B0jTcCkIh1IQEIRoyAXzliyt2S7VAz/xA8GNjBbFeE.c98elVBwad6y40qPNKYqYNkWZJFxpGw620", "password"}, + {NULL} +}; + +/* ------- Check if the ciphertext if a valid gost12512hash crypt ------- */ +static int valid(char * ciphertext, struct fmt_main * self) { + char *pos, *start; + + if (strncmp(ciphertext, FORMAT_TAG, FORMAT_TAG_LEN)) + return 0; + + ciphertext += FORMAT_TAG_LEN; + + if (!strncmp(ciphertext, ROUNDS_PREFIX, sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + + if (!strtoul(num, &endp, 10)) + return 0; + if (*endp == '$') + ciphertext = endp + 1; + } + for (pos = ciphertext; *pos && *pos != '$'; pos++); + if (!*pos || pos < ciphertext || pos > &ciphertext[SALT_LENGTH]) return 0; + + start = ++pos; + while (atoi64[ARCH_INDEX(*pos)] != 0x7F) pos++; + if (*pos || pos - start != CIPHERTEXT_LENGTH) + return 0; + return 1; +} + +/* ------- To binary functions ------- */ +#define TO_BINARY(b1, b2, b3) \ + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[3])] << 18); \ + pos += 4; \ + out[b1] = value >> 16; \ + out[b2] = value >> 8; \ + out[b3] = value; + +static void *get_binary(char * ciphertext) { + static uint32_t outbuf[64/4]; + uint32_t value; + char *pos = strrchr(ciphertext, '$') + 1; + unsigned char *out = (unsigned char*)outbuf; + int i = 0; + + do { + TO_BINARY(i, (i+21)%63, (i+42)%63); + i = (i+22)%63; + } while (i != 21); + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6); + out[63] = value; + return (void *)out; +} + +static int (*saved_len); +static char (*saved_key)[PLAINTEXT_LENGTH + 1]; +static uint32_t (*crypt_out)[BINARY_SIZE / sizeof(uint32_t)]; + +static struct saltstruct { + unsigned int len; + unsigned int rounds; + unsigned char salt[SALT_LENGTH]; +} *cur_salt; + +static void init(struct fmt_main *self) +{ + omp_autotune(self, OMP_SCALE); + + // we allocate 1 more than needed, and use that 'extra' value as a zero + // length PW to fill in the tail groups in MMX mode. + saved_len = mem_calloc(1 + self->params.max_keys_per_crypt, sizeof(*saved_len)); + saved_key = mem_calloc(1 + self->params.max_keys_per_crypt, sizeof(*saved_key)); + crypt_out = mem_calloc(1 + self->params.max_keys_per_crypt, sizeof(*crypt_out)); +} + +static void done(void) +{ + MEM_FREE(crypt_out); + MEM_FREE(saved_key); + MEM_FREE(saved_len); +} + +#define COMMON_GET_HASH_VAR crypt_out +#include "common-get-hash.h" + +static void set_key(char *key, int index) +{ + saved_len[index] = strnzcpyn(saved_key[index], key, sizeof(*saved_key)); +} + +static char *get_key(int index) +{ + saved_key[index][saved_len[index]] = 0; + return saved_key[index]; +} + +static int crypt_all(int *pcount, struct db_salt *salt) +{ + const int count = *pcount; + int index; +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (index = 0; index < count; index += MIN_KEYS_PER_CRYPT) { + unsigned char temp_result[BINARY_SIZE]; + GOST34112012Context ctx; + GOST34112012Context alt_ctx; + size_t cnt; + unsigned char *cp; + unsigned char p_bytes[PLAINTEXT_LENGTH+1]; + unsigned char s_bytes[PLAINTEXT_LENGTH+1]; + + /* Prepare for the real work. */ + GOST34112012Init(&ctx, 512); + + /* Add the key string. */ + GOST34112012Update(&ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* The last part is the salt string. This must be at most 16 + characters and it ends at the first `$' character (for + compatibility with existing implementations). */ + GOST34112012Update(&ctx, cur_salt->salt, cur_salt->len); + + + /* Compute alternate Streebog sum with input KEY, SALT, and KEY. The + final result will be added to the first context. */ + GOST34112012Init(&alt_ctx, 512); + + /* Add key. */ + GOST34112012Update(&alt_ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Add salt. */ + GOST34112012Update(&alt_ctx, cur_salt->salt, cur_salt->len); + + /* Add key again. */ + GOST34112012Update(&alt_ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Now get result of this (64 bytes) and add it to the other + context. */ + GOST34112012Final(&alt_ctx, (unsigned char*)crypt_out[index]); + + /* Add for any character in the key one byte of the alternate sum. */ + for (cnt = saved_len[index]; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], cnt); + + /* Take the binary representation of the length of the key and for every + 1 add the alternate sum, for every 0 the key. */ + for (cnt = saved_len[index]; cnt > 0; cnt >>= 1) + if ((cnt & 1) != 0) + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + else + GOST34112012Update(&ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, (unsigned char*)crypt_out[index]); + + /* Start computation of P byte sequence. */ + GOST34112012Init(&alt_ctx, 512); + + /* For every character in the password add the entire password. */ + for (cnt = 0; cnt < saved_len[index]; ++cnt) + GOST34112012Update(&alt_ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Finish the digest. */ + GOST34112012Final(&alt_ctx, temp_result); + + /* Create byte sequence P. */ + cp = p_bytes; + for (cnt = saved_len[index]; cnt >= BINARY_SIZE; cnt -= BINARY_SIZE) + cp = memcpy (cp, temp_result, BINARY_SIZE) + BINARY_SIZE; + memcpy (cp, temp_result, cnt); + + /* Start computation of S byte sequence. */ + GOST34112012Init(&alt_ctx, 512); + + /* repeast the following 16+A[0] times, where A[0] represents the + first byte in digest A interpreted as an 8-bit unsigned value */ + for (cnt = 0; cnt < 16 + ((unsigned char*)crypt_out[index])[0]; ++cnt) + GOST34112012Update(&alt_ctx, cur_salt->salt, cur_salt->len); + + /* Finish the digest. */ + GOST34112012Final(&alt_ctx, temp_result); + + /* Create byte sequence S. */ + cp = s_bytes; + for (cnt = cur_salt->len; cnt >= BINARY_SIZE; cnt -= BINARY_SIZE) + cp = memcpy (cp, temp_result, BINARY_SIZE) + BINARY_SIZE; + memcpy (cp, temp_result, cnt); + + /* Repeatedly run the collected hash value through Streebog to burn CPU cycles. */ + for (cnt = 0; cnt < cur_salt->rounds; ++cnt) { + /* New context. */ + GOST34112012Init(&ctx, 512); + + /* Add key or last result. */ + if ((cnt & 1) != 0) + GOST34112012Update(&ctx, p_bytes, saved_len[index]); + else + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + + /* Add salt for numbers not divisible by 3. */ + if (cnt % 3 != 0) + GOST34112012Update(&ctx, s_bytes, cur_salt->len); + + /* Add key for numbers not divisible by 7. */ + if (cnt % 7 != 0) + GOST34112012Update(&ctx, p_bytes, saved_len[index]); + + /* Add key or last result. */ + if ((cnt & 1) != 0) + GOST34112012Update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + else + GOST34112012Update(&ctx, p_bytes, saved_len[index]); + + /* Create intermediate result. */ + GOST34112012Final(&ctx, (unsigned char*)crypt_out[index]); + } + } + return count; +} + +static void set_salt(void *salt) +{ + cur_salt = salt; +} + +static void *get_salt(char *ciphertext) +{ + static struct saltstruct out; + int len; + + memset(&out, 0, sizeof(out)); + out.rounds = ROUNDS_DEFAULT; + ciphertext += FORMAT_TAG_LEN; + if (!strncmp(ciphertext, ROUNDS_PREFIX, + sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + unsigned long int srounds = strtoul(num, &endp, 10); + if (*endp == '$') + { + ciphertext = endp + 1; + srounds = srounds < ROUNDS_MIN ? + ROUNDS_MIN : srounds; + out.rounds = srounds > ROUNDS_MAX ? + ROUNDS_MAX : srounds; + } + } + + for (len = 0; ciphertext[len] != '$'; len++); + + if (len > SALT_LENGTH) + len = SALT_LENGTH; + + memcpy(out.salt, ciphertext, len); + out.len = len; + return &out; +} + +static int cmp_all(void *binary, int count) +{ + int index; + + for (index = 0; index < count; index++) + if (!memcmp(binary, crypt_out[index], ARCH_SIZE)) + return 1; + return 0; +} + +static int cmp_one(void *binary, int index) +{ + return !memcmp(binary, crypt_out[index], BINARY_SIZE); +} + +static int cmp_exact(char *source, int index) +{ + return 1; +} + +static unsigned int iteration_count(void *salt) +{ + struct saltstruct *csalt; + + csalt = salt; + return (unsigned int)csalt->rounds; +} + +// We are hashing the entire struct +static int salt_hash(void *salt) +{ + unsigned char *s = salt; + unsigned int hash = 5381; + unsigned int i; + + for (i = 0; i < SALT_SIZE; i++) + hash = ((hash << 5) + hash) ^ s[i]; + + return hash & (SALT_HASH_SIZE - 1); +} + +struct fmt_main fmt_gost12512hash = { + { + FORMAT_LABEL, + FORMAT_NAME, + ALGORITHM_NAME, + BENCHMARK_COMMENT, + BENCHMARK_LENGTH, + 0, + PLAINTEXT_LENGTH, + BINARY_SIZE, + BINARY_ALIGN, + SALT_SIZE, + SALT_ALIGN, + MIN_KEYS_PER_CRYPT, + MAX_KEYS_PER_CRYPT, + FMT_CASE | FMT_8_BIT | FMT_OMP, + { + "iteration count", + }, + { FORMAT_TAG }, + tests + }, { + init, + done, + fmt_default_reset, + fmt_default_prepare, + valid, + fmt_default_split, + get_binary, + get_salt, + { + iteration_count, + }, + fmt_default_source, + { + fmt_default_binary_hash_0, + fmt_default_binary_hash_1, + fmt_default_binary_hash_2, + fmt_default_binary_hash_3, + fmt_default_binary_hash_4, + fmt_default_binary_hash_5, + fmt_default_binary_hash_6 + }, + salt_hash, + NULL, + set_salt, + set_key, + get_key, + fmt_default_clear_keys, + crypt_all, + { +#define COMMON_GET_HASH_LINK +#include "common-get-hash.h" + }, + cmp_all, + cmp_one, + cmp_exact + } +}; + +#endif /* plugin stanza */ diff --git a/src/gost94hash_fmt_plug.c b/src/gost94hash_fmt_plug.c new file mode 100644 index 0000000000..99628acce0 --- /dev/null +++ b/src/gost94hash_fmt_plug.c @@ -0,0 +1,443 @@ +/* + * This software is Copyright (c) 2022 magnum, + * and it is hereby released to the general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#if FMT_EXTERNS_H +extern struct fmt_main fmt_gost94hash; +#elif FMT_REGISTERS_H +john_register_one(&fmt_gost94hash); +#else + +#define _GNU_SOURCE 1 +#include + +#ifdef _OPENMP +#include +#endif + +#include "arch.h" +#include "gost.h" +#include "params.h" +#include "common.h" +#include "formats.h" +#include "johnswap.h" + +#ifndef OMP_SCALE +#define OMP_SCALE 1 // This and MKPC tuned for core i7 +#endif + +#define FORMAT_LABEL "gost94crypt" +#define FORMAT_NAME "Astra Linux $gost94hash$" +#define ALGORITHM_NAME "GOST R 34.11-94 32/" ARCH_BITS_STR + +#define PLAINTEXT_LENGTH 125 + +#define SALT_SIZE sizeof(struct saltstruct) + +#define MIN_KEYS_PER_CRYPT 1 +#define MAX_KEYS_PER_CRYPT 1 + +#define BLKS MIN_KEYS_PER_CRYPT + +/* Prefix for optional rounds specification. */ +#define ROUNDS_PREFIX "rounds=" +/* Default number of rounds if not explicitly specified. */ +#define ROUNDS_DEFAULT 5000 +/* Minimum number of rounds. */ +#define ROUNDS_MIN 1 /* Drepper has it as 1000 */ +/* Maximum number of rounds. */ +#define ROUNDS_MAX 999999999 + +#define BENCHMARK_COMMENT " (rounds=5000)" +#define BENCHMARK_LENGTH 0x107 +#define CIPHERTEXT_LENGTH 43 + +#define BINARY_SIZE 32 +#define BINARY_ALIGN 4 +#define SALT_LENGTH 16 +#define SALT_ALIGN 4 +#define FORMAT_TAG "$gost94hash$" +#define FORMAT_TAG_LEN (sizeof(FORMAT_TAG)-1) + +static int (*saved_len); +static char (*saved_key)[PLAINTEXT_LENGTH + 1]; +static uint32_t (*crypt_out)[BINARY_SIZE / sizeof(uint32_t)]; + +static struct saltstruct { + unsigned int len; + unsigned int rounds; + unsigned char salt[SALT_LENGTH]; +} *cur_salt; + +static struct fmt_tests tests[] = { + {"$gost94hash$salt$sG.6rfU0vKHX4eL00bUDqjXxaAcQHqpJQlM3ctfj013", "magnum"}, + {"$gost94hash$longersalt$KthJlkmGINf53PizXq8phMfeIC0deIsfafswsC3UN74", "password"}, + {"$gost94hash$longersalt$EtAWEGKZQGtZeHXaSDpQJP5tLhZnOi1NC2M/7PCmHZ6", "longerpassword"}, + {NULL} +}; + +/* ------- Check if the ciphertext if a valid gost94hash crypt ------- */ +static int valid(char * ciphertext, struct fmt_main * self) { + char *pos, *start; + + if (strncmp(ciphertext, FORMAT_TAG, FORMAT_TAG_LEN)) + return 0; + + ciphertext += FORMAT_TAG_LEN; + + if (!strncmp(ciphertext, ROUNDS_PREFIX, sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + if (!strtoul(num, &endp, 10)) + return 0; + if (*endp == '$') + ciphertext = endp + 1; + } + for (pos = ciphertext; *pos && *pos != '$'; pos++); + if (!*pos || pos < ciphertext) return 0; + + start = ++pos; + while (atoi64[ARCH_INDEX(*pos)] != 0x7F) pos++; + if (*pos || pos - start != CIPHERTEXT_LENGTH) return 0; + return 1; +} + +/* ------- To binary functions ------- */ +#define TO_BINARY(b1, b2, b3) \ + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[3])] << 18); \ + pos += 4; \ + out[b1] = value >> 16; \ + out[b2] = value >> 8; \ + out[b3] = value; + +static void * get_binary(char * ciphertext) { + static uint32_t outbuf[BINARY_SIZE/4]; + uint32_t value; + char *pos = strrchr(ciphertext, '$') + 1; + unsigned char *out = (unsigned char*)outbuf; + int i=0; + + do { + TO_BINARY(i, (i+10)%30, (i+20)%30); + i = (i+21)%30; + } while (i != 0); + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12); + out[31] = value >> 8; + out[30] = value; + return (void *)out; +} + +static void init(struct fmt_main *self) +{ + omp_autotune(self, OMP_SCALE); + + saved_len = mem_calloc(self->params.max_keys_per_crypt, sizeof(*saved_len)); + saved_key = mem_calloc(self->params.max_keys_per_crypt, sizeof(*saved_key)); + crypt_out = mem_calloc(self->params.max_keys_per_crypt, sizeof(*crypt_out)); + + gost_init_table(); +} + +static void done(void) +{ + MEM_FREE(crypt_out); + MEM_FREE(saved_key); + MEM_FREE(saved_len); +} + +#define COMMON_GET_HASH_VAR crypt_out +#include "common-get-hash.h" + +static void set_key(char *key, int index) +{ + saved_len[index] = strnzcpyn(saved_key[index], key, sizeof(*saved_key)); +} + +static char *get_key(int index) +{ + saved_key[index][saved_len[index]] = 0; + return saved_key[index]; +} + +//#define john_gost_init john_gost_cryptopro_init + +static int crypt_all(int *pcount, struct db_salt *salt) +{ + const int count = *pcount; + int index; + +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (index = 0; index < count; index++) { + // portably align temp_result char * pointer to 32 bits. + union xx { + unsigned char c[BINARY_SIZE]; + uint32_t a[BINARY_SIZE/sizeof(uint32_t)]; + } u; + unsigned char *temp_result = u.c; + gost_ctx ctx; + gost_ctx alt_ctx; + size_t cnt; + unsigned char *cp; + unsigned char p_bytes[PLAINTEXT_LENGTH + 1]; + unsigned char s_bytes[PLAINTEXT_LENGTH + 1]; + + /* Prepare for the real work. */ + john_gost_init(&ctx); + + /* Add the key string. */ + john_gost_update(&ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* The last part is the salt string. This must be at most 16 + characters and it ends at the first `$' character (for + compatibility with existing implementations). */ + john_gost_update(&ctx, cur_salt->salt, cur_salt->len); + + + /* Compute alternate GOST sum with input KEY, SALT, and KEY. The + final result will be added to the first context. */ + john_gost_init(&alt_ctx); + + /* Add key. */ + john_gost_update(&alt_ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Add salt. */ + john_gost_update(&alt_ctx, cur_salt->salt, cur_salt->len); + + /* Add key again. */ + john_gost_update(&alt_ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Now get result of this (32 bytes) and add it to the other context. */ + john_gost_final(&alt_ctx, (unsigned char*)crypt_out[index]); + + /* Add for any character in the key one byte of the alternate sum. */ + for (cnt = saved_len[index]; cnt > BINARY_SIZE; cnt -= BINARY_SIZE) + john_gost_update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + john_gost_update(&ctx, (unsigned char*)crypt_out[index], cnt); + + /* Take the binary representation of the length of the key and for every + 1 add the alternate sum, for every 0 the key. */ + for (cnt = saved_len[index]; cnt > 0; cnt >>= 1) + if ((cnt & 1) != 0) + john_gost_update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + else + john_gost_update(&ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Create intermediate result. */ + john_gost_final(&ctx, (unsigned char*)crypt_out[index]); + + /* Start computation of P byte sequence. */ + john_gost_init(&alt_ctx); + + /* For every character in the password add the entire password. */ + for (cnt = 0; cnt < saved_len[index]; ++cnt) + john_gost_update(&alt_ctx, (unsigned char*)saved_key[index], saved_len[index]); + + /* Finish the digest. */ + john_gost_final(&alt_ctx, temp_result); + + /* Create byte sequence P. */ + cp = p_bytes; + for (cnt = saved_len[index]; cnt >= BINARY_SIZE; cnt -= BINARY_SIZE) + cp = (unsigned char*)memcpy(cp, temp_result, BINARY_SIZE) + BINARY_SIZE; + memcpy(cp, temp_result, cnt); + + /* Start computation of S byte sequence. */ + john_gost_init(&alt_ctx); + + /* For every character in the password add the entire password. */ + for (cnt = 0; cnt < 16 + ((unsigned char*)crypt_out[index])[0]; ++cnt) + john_gost_update(&alt_ctx, cur_salt->salt, cur_salt->len); + + /* Finish the digest. */ + john_gost_final(&alt_ctx, temp_result); + + /* Create byte sequence S. */ + cp = s_bytes; + for (cnt = cur_salt->len; cnt >= BINARY_SIZE; cnt -= BINARY_SIZE) + cp = (unsigned char*)memcpy(cp, temp_result, BINARY_SIZE) + BINARY_SIZE; + memcpy(cp, temp_result, cnt); + + /* Repeatedly run the collected hash value through GOST to burn CPU cycles. */ + for (cnt = 0; cnt < cur_salt->rounds; ++cnt) { + /* New context. */ + john_gost_init(&ctx); + + /* Add key or last result. */ + if ((cnt & 1) != 0) + john_gost_update(&ctx, p_bytes, saved_len[index]); + else + john_gost_update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + + /* Add salt for numbers not divisible by 3. */ + if (cnt % 3 != 0) + john_gost_update(&ctx, s_bytes, cur_salt->len); + + /* Add key for numbers not divisible by 7. */ + if (cnt % 7 != 0) + john_gost_update(&ctx, p_bytes, saved_len[index]); + + /* Add key or last result. */ + if ((cnt & 1) != 0) + john_gost_update(&ctx, (unsigned char*)crypt_out[index], BINARY_SIZE); + else + john_gost_update(&ctx, p_bytes, saved_len[index]); + + /* Create intermediate result. */ + john_gost_final(&ctx, (unsigned char*)crypt_out[index]); + } + } + + return count; +} + +static void set_salt(void *salt) +{ + cur_salt = salt; +} + +static void *get_salt(char *ciphertext) +{ + static struct saltstruct out; + int len; + + memset(&out, 0, sizeof(out)); + out.rounds = ROUNDS_DEFAULT; + ciphertext += FORMAT_TAG_LEN; + if (!strncmp(ciphertext, ROUNDS_PREFIX, + sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + unsigned long int srounds = strtoul(num, &endp, 10); + if (*endp == '$') + { + ciphertext = endp + 1; + srounds = srounds < ROUNDS_MIN ? + ROUNDS_MIN : srounds; + out.rounds = srounds > ROUNDS_MAX ? + ROUNDS_MAX : srounds; + } + } + + for (len = 0; ciphertext[len] != '$'; len++); + + if (len > SALT_LENGTH) + len = SALT_LENGTH; + + memcpy(out.salt, ciphertext, len); + out.len = len; + return &out; +} + +static int cmp_all(void *binary, int count) +{ + int index; + + for (index = 0; index < count; index++) + if (!memcmp(binary, crypt_out[index], ARCH_SIZE)) + return 1; + return 0; +} + +static int cmp_one(void *binary, int index) +{ + return !memcmp(binary, crypt_out[index], BINARY_SIZE); +} + +static int cmp_exact(char *source, int index) +{ + return 1; +} + +static unsigned int iteration_count(void *salt) +{ + struct saltstruct *gost94hash_salt; + + gost94hash_salt = salt; + return (unsigned int)gost94hash_salt->rounds; +} + +// We are hashing the entire struct +static int salt_hash(void *salt) +{ + unsigned char *s = salt; + unsigned int hash = 5381; + unsigned int i; + + for (i = 0; i < SALT_SIZE; i++) + hash = ((hash << 5) + hash) ^ s[i]; + + return hash & (SALT_HASH_SIZE - 1); +} + +struct fmt_main fmt_gost94hash = { + { + FORMAT_LABEL, + FORMAT_NAME, + ALGORITHM_NAME, + BENCHMARK_COMMENT, + BENCHMARK_LENGTH, + 0, + PLAINTEXT_LENGTH, + BINARY_SIZE, + BINARY_ALIGN, + SALT_SIZE, + SALT_ALIGN, + MIN_KEYS_PER_CRYPT, + MAX_KEYS_PER_CRYPT, + FMT_CASE | FMT_8_BIT | FMT_OMP, + { + "iteration count", + }, + { FORMAT_TAG }, + tests + }, { + init, + done, + fmt_default_reset, + fmt_default_prepare, + valid, + fmt_default_split, + get_binary, + get_salt, + { + iteration_count, + }, + fmt_default_source, + { + fmt_default_binary_hash_0, + fmt_default_binary_hash_1, + fmt_default_binary_hash_2, + fmt_default_binary_hash_3, + fmt_default_binary_hash_4, + fmt_default_binary_hash_5, + fmt_default_binary_hash_6 + }, + salt_hash, + NULL, + set_salt, + set_key, + get_key, + fmt_default_clear_keys, + crypt_all, + { +#define COMMON_GET_HASH_LINK +#include "common-get-hash.h" + }, + cmp_all, + cmp_one, + cmp_exact + } +}; + +#endif /* plugin stanza */ diff --git a/src/opencl_common.c b/src/opencl_common.c index 3065c5fcb0..c46e291f79 100644 --- a/src/opencl_common.c +++ b/src/opencl_common.c @@ -6,7 +6,7 @@ * This software is * Copyright (c) 2010-2012 Samuele Giovanni Tonon * Copyright (c) 2010-2013 Lukas Odzioba - * Copyright (c) 2010-2019 magnum + * Copyright (c) 2010-2022 magnum * Copyright (c) 2012-2015 Claudio André * * and is hereby released to the general public under the following terms: @@ -1279,6 +1279,8 @@ void opencl_build(int sequential_id, const char *opts, int save, const char *fil uint64_t end = john_get_nano(); log_event("- build time: %ss", ns2string(end - start)); + if (options.verbosity >= VERB_MAX) + fprintf(stderr, "Build time: %ss\n", ns2string(end - start)); // Report build errors and warnings if (build_code != CL_SUCCESS) { @@ -1394,6 +1396,8 @@ cl_int opencl_build_from_binary(int sequential_id, cl_program *program, const ch fprintf(stderr, "Binary Build log: %s\n", build_log); log_event("- build time: %ss", ns2string(end - start)); + if (options.verbosity >= VERB_MAX) + fprintf(stderr, "Build time: %ss\n", ns2string(end - start)); MEM_FREE(build_log); return CL_SUCCESS; } diff --git a/src/opencl_gost12256hash_fmt_plug.c b/src/opencl_gost12256hash_fmt_plug.c new file mode 100644 index 0000000000..a0ea2071b5 --- /dev/null +++ b/src/opencl_gost12256hash_fmt_plug.c @@ -0,0 +1,492 @@ +/* + * This software is Copyright (c) 2022 magnum, + * and it is hereby released to the general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#ifdef HAVE_OPENCL + +#if FMT_EXTERNS_H +extern struct fmt_main fmt_opencl_cryptgost12256; +#elif FMT_REGISTERS_H +john_register_one(&fmt_opencl_cryptgost12256); +#else + +#include + +#include "arch.h" +#include "params.h" +#include "common.h" +#include "formats.h" +#include "options.h" +#include "opencl_common.h" + +#define FORMAT_LABEL "streebog256crypt-opencl" +#define FORMAT_NAME "Astra Linux $gost12256hash$" +#define ALGORITHM_NAME "GOST R 34.11-2012 OpenCL" + +#if __APPLE__ +#define PLAINTEXT_LENGTH 32 /* Larger than this will segfault - driver bug? */ +#else +#define PLAINTEXT_LENGTH 60 /* This size makes for some optimizations and alignment */ +#endif + +#define BINARY_SIZE (256/8) // 32 +#define BINARY_ALIGN 4 +#define SALT_SIZE sizeof(saltstruct) +#define SALT_ALIGN 4 + +#define SALT_LENGTH 16 + +#define MIN_KEYS_PER_CRYPT 1 +#define MAX_KEYS_PER_CRYPT 1 + +#define CIPHERTEXT_LENGTH 43 + +/* ------ Contains (at least) prepare(), valid() and split() ------ */ +/* Prefix for optional rounds specification. */ +#define ROUNDS_PREFIX "rounds=" +/* Default number of rounds if not explicitly specified. */ +#define ROUNDS_DEFAULT 5000 +/* Minimum number of rounds. Libs usually have it as 1000 but we accept any */ +#define ROUNDS_MIN 1 +/* Maximum number of rounds. */ +#define ROUNDS_MAX 999999999 + +#define BENCHMARK_COMMENT " (rounds=5000)" +#define BENCHMARK_LENGTH 0x107 +#define FORMAT_TAG "$gost12256hash$" +#define FORMAT_TAG_LEN (sizeof(FORMAT_TAG)-1) + +static struct fmt_tests tests[] = { + {"$gost12256hash$password$awrQfwgXMa0BFMCtZu97GJKqeVszI/B2usmTf9cpOa/", "magnum"}, + {"$gost12256hash$longersalt$KP7Eyt1XM83PbW3jtvOAtsQUQUf0EKZBP0UqFds7AU7", "longerpassword"}, + {"$gost12256hash$eVszI/B2usmT$gGPHrK8MAsv/KLAcLhSXZES5OdI9dMFQONmIpUDNzi5", "password"}, + {NULL} +}; + +typedef struct { + unsigned int len; + char key[PLAINTEXT_LENGTH]; +} inbuf; + +typedef struct { + unsigned int v[BINARY_SIZE / sizeof(unsigned int)]; +} outbuf; +static outbuf *crypt_out; + +typedef struct { + unsigned int rounds; + unsigned int len; + unsigned char salt[SALT_LENGTH]; +} saltstruct; +static saltstruct cur_salt; + +typedef struct { + unsigned char p_bytes[PLAINTEXT_LENGTH]; + unsigned char s_bytes[SALT_LENGTH]; +} statebuf; + +typedef struct { + uint64_t Ax[8][256]; +} localbuf; + +#define STEP 0 +#define SEED 128 +#define HASH_LOOPS (42 * 4) /* Kernel is hardcoded for multiple of 42 for optimizations */ +#define LOOP_CALLS (5000 + (HASH_LOOPS - 1) / HASH_LOOPS) +#define ITERATIONS 5004 + +static inbuf *inbuffer; +static cl_int cl_error; +static cl_mem mem_in, mem_out, mem_salt, mem_state; +static cl_kernel init_kernel, final_kernel; +static int new_keys; +static struct fmt_main *self; + +static const char *warn[] = { + "key xfer: ", ", init: ", ", loop: ", ", final: ", ", result xfer: " +}; + +static int split_events[] = { 2, -1, -1 }; + +// This file contains auto-tuning routine(s). Has to be included after formats definitions. +#include "opencl_autotune.h" + +static void release_clobj(void); + +#define CL_RO CL_MEM_READ_ONLY +#define CL_WO CL_MEM_WRITE_ONLY +#define CL_RW CL_MEM_READ_WRITE + +#define CLCREATEBUFFER(_flags, _size) \ + clCreateBuffer(context[gpu_id], _flags, _size, NULL, &cl_error); \ + HANDLE_CLERROR(cl_error, "Error allocating GPU memory"); + +#define CLKERNELARG(kernel, id, arg) \ + HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), \ + "Error setting kernel argument"); + +#define CLKRNARGLOC(kernel, id, arg) \ + HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), NULL), \ + "Error setting kernel argument"); + +static void create_clobj(size_t gws, struct fmt_main *self) +{ + release_clobj(); + + inbuffer = mem_calloc(gws, sizeof(inbuf)); + crypt_out = mem_calloc(gws, sizeof(outbuf)); + + mem_in = CLCREATEBUFFER(CL_RO, gws * sizeof(inbuf)); + mem_out = CLCREATEBUFFER(CL_WO, gws * sizeof(outbuf)); + mem_state = CLCREATEBUFFER(CL_RW, gws * sizeof(statebuf)); + mem_salt = CLCREATEBUFFER(CL_RO, sizeof(saltstruct)); + + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_TRUE, 0, gws * sizeof(inbuf), inbuffer, 0, NULL, NULL), "Copy data to gpu"); + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt, CL_TRUE, 0, sizeof(saltstruct), &cur_salt, 0, NULL, NULL), "Salt transfer"); + + CLKERNELARG(init_kernel, 0, mem_in); + CLKERNELARG(init_kernel, 1, mem_salt); + CLKERNELARG(init_kernel, 2, mem_state); + CLKRNARGLOC(init_kernel, 3, localbuf); + CLKERNELARG(init_kernel, 4, mem_out); + + CLKERNELARG(crypt_kernel, 0, mem_in); + CLKERNELARG(crypt_kernel, 1, mem_salt); + CLKERNELARG(crypt_kernel, 2, mem_state); + CLKRNARGLOC(crypt_kernel, 3, localbuf); + CLKERNELARG(crypt_kernel, 4, mem_out); + + CLKERNELARG(final_kernel, 0, mem_in); + CLKERNELARG(final_kernel, 1, mem_salt); + CLKERNELARG(final_kernel, 2, mem_state); + CLKRNARGLOC(final_kernel, 3, localbuf); + CLKERNELARG(final_kernel, 4, mem_out); + + global_work_size = gws; +} + +/* ------- Helper functions ------- */ +static size_t get_task_max_work_group_size() +{ + size_t s = autotune_get_task_max_work_group_size(FALSE, 0, init_kernel); + s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel)); + return MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, final_kernel)); +} + +static void release_clobj(void) +{ + if (crypt_out) { + HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in"); + HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt"); + HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state"); + HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out"); + + MEM_FREE(inbuffer); + MEM_FREE(crypt_out); + } +} + +static void init(struct fmt_main *_self) +{ + self = _self; + + opencl_prepare_dev(gpu_id); +} + +static void reset(struct db_main *db) +{ + if (!program[gpu_id]) { + char build_opts[64]; + + snprintf(build_opts, sizeof(build_opts), "-DPLAINTEXT_LENGTH=%u -DHASH_LOOPS=%u", + PLAINTEXT_LENGTH, HASH_LOOPS); + opencl_init("$JOHN/opencl/gost12256hash_kernel.cl", gpu_id, build_opts); + + init_kernel = clCreateKernel(program[gpu_id], "gost12256init", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel"); + + crypt_kernel = clCreateKernel(program[gpu_id], "gost12256loop", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel"); + + final_kernel = clCreateKernel(program[gpu_id], "gost12256final", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel"); + } + + // Initialize openCL tuning (library) for this format. + opencl_init_auto_setup(SEED, HASH_LOOPS, split_events, warn, + 2, self, create_clobj, release_clobj, + sizeof(mem_state), 0, db); + + // Auto tune execution from shared/included code. + autotune_run(self, LOOP_CALLS, 0, 200); +} + +static void done(void) +{ + if (program[gpu_id]) { + release_clobj(); + + HANDLE_CLERROR(clReleaseKernel(final_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseKernel(init_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program"); + + program[gpu_id] = NULL; + } +} + +/* ------- Check if the ciphertext if a valid gost12256hash crypt ------- */ +static int valid(char * ciphertext, struct fmt_main * self) { + char *pos, *start; + + if (strncmp(ciphertext, FORMAT_TAG, FORMAT_TAG_LEN)) + return 0; + + ciphertext += FORMAT_TAG_LEN; + + if (!strncmp(ciphertext, ROUNDS_PREFIX, sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + if (!strtoul(num, &endp, 10)) + return 0; + if (*endp == '$') + ciphertext = endp + 1; + } + for (pos = ciphertext; *pos && *pos != '$'; pos++); + if (!*pos || pos < ciphertext || pos > &ciphertext[SALT_LENGTH]) return 0; + + start = ++pos; + while (atoi64[ARCH_INDEX(*pos)] != 0x7F) pos++; + if (*pos || pos - start != CIPHERTEXT_LENGTH) return 0; + return 1; +} + +/* ------- To binary functions ------- */ +#define TO_BINARY(b1, b2, b3) \ + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[3])] << 18); \ + pos += 4; \ + out[b1] = value >> 16; \ + out[b2] = value >> 8; \ + out[b3] = value; + +static void * get_binary(char * ciphertext) { + static uint32_t outbuf[BINARY_SIZE/4]; + uint32_t value; + char *pos = strrchr(ciphertext, '$') + 1; + unsigned char *out = (unsigned char*)outbuf; + int i=0; + + do { + TO_BINARY(i, (i+10)%30, (i+20)%30); + i = (i+21)%30; + } while (i != 0); + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12); + out[31] = value >> 8; + out[30] = value; + return (void *)out; +} + +static void set_salt(void *salt) +{ + memcpy(&cur_salt, salt, sizeof(saltstruct)); + + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt, CL_FALSE, 0, sizeof(saltstruct), &cur_salt, 0, NULL, NULL), "Salt transfer"); + HANDLE_CLERROR(clFlush(queue[gpu_id]), "clFlush failed in set_salt()"); +} + +static int crypt_all(int *pcount, struct db_salt *salt) +{ + const int count = *pcount; + size_t *lws = local_work_size ? &local_work_size : NULL; + size_t gws = GET_KPC_MULTIPLE(count, local_work_size); + int index; + + // Copy data to gpu + if (new_keys) { + BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, gws * sizeof(inbuf), inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu"); + + new_keys = 0; + } + + // Run kernel + BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], init_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel"); + + uint loops = (ocl_autotune_running ? 1 : cur_salt.rounds / HASH_LOOPS); + for (index = 0; index < loops; index++) { + BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[2]), "failed in clEnqueueNDRangeKernel"); + BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel"); + opencl_process_event(); + } + BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], final_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[3]), "failed in clEnqueueNDRangeKernel"); + + // Read the result back + BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, gws * sizeof(outbuf), crypt_out, 0, NULL, multi_profilingEvent[4]), "Copy result back"); + + return count; +} + +static void *get_salt(char *ciphertext) +{ + int len; + + memset(&cur_salt, 0, sizeof(cur_salt)); + cur_salt.rounds = ROUNDS_DEFAULT; + ciphertext += FORMAT_TAG_LEN; + if (!strncmp(ciphertext, ROUNDS_PREFIX, + sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + unsigned long int srounds = strtoul(num, &endp, 10); + if (*endp == '$') + { + ciphertext = endp + 1; + srounds = srounds < ROUNDS_MIN ? + ROUNDS_MIN : srounds; + cur_salt.rounds = srounds > ROUNDS_MAX ? + ROUNDS_MAX : srounds; + } + } + + for (len = 0; ciphertext[len] != '$'; len++); + + if (len > SALT_LENGTH) + len = SALT_LENGTH; + + memcpy(cur_salt.salt, ciphertext, len); + cur_salt.len = len; + return &cur_salt; +} + +static int cmp_all(void *binary, int count) +{ + int index; + + for (index = 0; index < count; index++) + if (!memcmp(binary, crypt_out[index].v, ARCH_SIZE)) + return 1; + return 0; +} + +static int cmp_one(void *binary, int index) +{ + return !memcmp(binary, crypt_out[index].v, BINARY_SIZE); +} + +static int cmp_exact(char *source, int index) +{ + return 1; +} + +static void set_key(char *key, int index) +{ + inbuffer[index].len = strlen(key); + + memcpy(inbuffer[index].key, key, inbuffer[index].len); + + new_keys = 1; +} + +static char* get_key(int index) +{ + static char out[PLAINTEXT_LENGTH + 1]; + + memcpy(out, inbuffer[index].key, inbuffer[index].len); + out[inbuffer[index].len] = 0; + + return out; +} + +// Public domain hash function by DJ Bernstein +// We are hashing the entire struct, so rounds get included +static int salt_hash(void *salt) +{ + unsigned char *s = salt; + unsigned int hash = 5381; + unsigned int i; + + for (i = 0; i < SALT_SIZE; i++) + hash = ((hash << 5) + hash) ^ s[i]; + + return hash & (SALT_HASH_SIZE - 1); +} + +static unsigned int iteration_count(void *salt) +{ + saltstruct *p = salt; + return p->rounds; +} + +#define COMMON_GET_HASH_VAR crypt_out +#include "common-get-hash.h" + +struct fmt_main fmt_opencl_cryptgost12256 = { + { + FORMAT_LABEL, + FORMAT_NAME, + ALGORITHM_NAME, + BENCHMARK_COMMENT, + BENCHMARK_LENGTH, + 0, + PLAINTEXT_LENGTH, + BINARY_SIZE, + BINARY_ALIGN, + SALT_SIZE, + SALT_ALIGN, + MIN_KEYS_PER_CRYPT, + MAX_KEYS_PER_CRYPT, + FMT_CASE | FMT_8_BIT, + { + "iteration count", + }, + { FORMAT_TAG }, + tests + }, { + init, + done, + reset, + fmt_default_prepare, + valid, + fmt_default_split, + get_binary, + get_salt, + { + iteration_count, + }, + fmt_default_source, + { + fmt_default_binary_hash_0, + fmt_default_binary_hash_1, + fmt_default_binary_hash_2, + fmt_default_binary_hash_3, + fmt_default_binary_hash_4, + fmt_default_binary_hash_5, + fmt_default_binary_hash_6 + }, + salt_hash, + NULL, + set_salt, + set_key, + get_key, + fmt_default_clear_keys, + crypt_all, + { +#define COMMON_GET_HASH_LINK +#include "common-get-hash.h" + }, + cmp_all, + cmp_one, + cmp_exact + } +}; + +#endif /* plugin stanza */ +#endif /* HAVE_OPENCL */ diff --git a/src/opencl_gost12512hash_fmt_plug.c b/src/opencl_gost12512hash_fmt_plug.c new file mode 100644 index 0000000000..8e92e10f56 --- /dev/null +++ b/src/opencl_gost12512hash_fmt_plug.c @@ -0,0 +1,491 @@ +/* + * This software is Copyright (c) 2022 magnum, + * and it is hereby released to the general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#ifdef HAVE_OPENCL + +#if FMT_EXTERNS_H +extern struct fmt_main fmt_opencl_cryptgost12512; +#elif FMT_REGISTERS_H +john_register_one(&fmt_opencl_cryptgost12512); +#else + +#include + +#include "arch.h" +#include "params.h" +#include "common.h" +#include "formats.h" +#include "options.h" +#include "opencl_common.h" + +#define FORMAT_LABEL "streebog512crypt-opencl" +#define FORMAT_NAME "Astra Linux $gost12512hash$" +#define ALGORITHM_NAME "GOST R 34.11-2012 OpenCL" + +#if __APPLE__ +#define PLAINTEXT_LENGTH 32 /* Larger than this will segfault - driver bug? */ +#else +#define PLAINTEXT_LENGTH 60 /* This size makes for some optimizations and alignment */ +#endif + +#define BINARY_SIZE (512/8) // 64 +#define BINARY_ALIGN 4 +#define SALT_SIZE sizeof(saltstruct) +#define SALT_ALIGN 4 + +#define SALT_LENGTH 16 + +#define MIN_KEYS_PER_CRYPT 1 +#define MAX_KEYS_PER_CRYPT 1 + +#define CIPHERTEXT_LENGTH 86 + +/* ------ Contains (at least) prepare(), valid() and split() ------ */ +/* Prefix for optional rounds specification. */ +#define ROUNDS_PREFIX "rounds=" +/* Default number of rounds if not explicitly specified. */ +#define ROUNDS_DEFAULT 5000 +/* Minimum number of rounds. Libs usually have it as 1000 but we accept any */ +#define ROUNDS_MIN 1 +/* Maximum number of rounds. */ +#define ROUNDS_MAX 999999999 + +#define BENCHMARK_COMMENT " (rounds=5000)" +#define BENCHMARK_LENGTH 0x107 +#define FORMAT_TAG "$gost12512hash$" +#define FORMAT_TAG_LEN (sizeof(FORMAT_TAG)-1) + +static struct fmt_tests tests[] = { + {"$gost12512hash$salt$nwv1Oqm2zL9523APlKI6Fpmlm56UzSe.J1wDzMk61S2zxa1nWY4U9d3oP5UyOpZQvTzVPEZWRnfRd/yJZeXp2.", "magnum"}, + {"$gost12512hash$longersalt$Ru..ARXlil3CeBqecyE9EBW1tfZZoPPnnwWJKy7nbhOnzmXpldy3nt1iB1QM21KD73L1tm/zWsiyU05zHluwO1", "longerpassword"}, + {"$gost12512hash$KrmcHbeQmxiP/Ipl$uNmEQvpJ7B0jTcCkIh1IQEIRoyAXzliyt2S7VAz/xA8GNjBbFeE.c98elVBwad6y40qPNKYqYNkWZJFxpGw620", "password"}, + {NULL} +}; + +typedef struct { + unsigned int len; + char key[PLAINTEXT_LENGTH]; +} inbuf; + +typedef struct { + unsigned int v[BINARY_SIZE / sizeof(unsigned int)]; +} outbuf; +static outbuf *crypt_out; + +typedef struct { + unsigned int rounds; + unsigned int len; + unsigned char salt[SALT_LENGTH]; +} saltstruct; +static saltstruct cur_salt; + +typedef struct { + unsigned char p_bytes[PLAINTEXT_LENGTH]; + unsigned char s_bytes[SALT_LENGTH]; +} statebuf; + +typedef struct { + uint64_t Ax[8][256]; +} localbuf; + +#define STEP 0 +#define SEED 128 +#define HASH_LOOPS (42 * 4) /* Kernel is hardcoded for multiple of 42 for optimizations */ +#define LOOP_CALLS (5000 + (HASH_LOOPS - 1) / HASH_LOOPS) +#define ITERATIONS 5004 + +static inbuf *inbuffer; +static cl_int cl_error; +static cl_mem mem_in, mem_out, mem_salt, mem_state; +static cl_kernel init_kernel, final_kernel; +static int new_keys; +static struct fmt_main *self; + +static const char * warn[] = { + "key xfer: ", ", init: ", ", loop: ", ", final: ", ", result xfer: " +}; + +static int split_events[] = { 2, -1, -1 }; + +// This file contains auto-tuning routine(s). Has to be included after formats definitions. +#include "opencl_autotune.h" + +static void release_clobj(void); + +#define CL_RO CL_MEM_READ_ONLY +#define CL_WO CL_MEM_WRITE_ONLY +#define CL_RW CL_MEM_READ_WRITE + +#define CLCREATEBUFFER(_flags, _size) \ + clCreateBuffer(context[gpu_id], _flags, _size, NULL, &cl_error); \ + HANDLE_CLERROR(cl_error, "Error allocating GPU memory"); + +#define CLKERNELARG(kernel, id, arg) \ + HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), \ + "Error setting kernel argument"); + +#define CLKRNARGLOC(kernel, id, arg) \ + HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), NULL), \ + "Error setting kernel argument"); + +static void create_clobj(size_t gws, struct fmt_main *self) +{ + release_clobj(); + + inbuffer = mem_calloc(gws, sizeof(inbuf)); + crypt_out = mem_calloc(gws, sizeof(outbuf)); + + mem_in = CLCREATEBUFFER(CL_RO, gws * sizeof(inbuf)); + mem_out = CLCREATEBUFFER(CL_WO, gws * sizeof(outbuf)); + mem_state = CLCREATEBUFFER(CL_RW, gws * sizeof(statebuf)); + mem_salt = CLCREATEBUFFER(CL_RO, sizeof(saltstruct)); + + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_TRUE, 0, gws * sizeof(inbuf), inbuffer, 0, NULL, NULL), "Copy data to gpu"); + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt, CL_TRUE, 0, sizeof(saltstruct), &cur_salt, 0, NULL, NULL), "Salt transfer"); + + CLKERNELARG(init_kernel, 0, mem_in); + CLKERNELARG(init_kernel, 1, mem_salt); + CLKERNELARG(init_kernel, 2, mem_state); + CLKRNARGLOC(init_kernel, 3, localbuf); + CLKERNELARG(init_kernel, 4, mem_out); + + CLKERNELARG(crypt_kernel, 0, mem_in); + CLKERNELARG(crypt_kernel, 1, mem_salt); + CLKERNELARG(crypt_kernel, 2, mem_state); + CLKRNARGLOC(crypt_kernel, 3, localbuf); + CLKERNELARG(crypt_kernel, 4, mem_out); + + CLKERNELARG(final_kernel, 0, mem_in); + CLKERNELARG(final_kernel, 1, mem_salt); + CLKERNELARG(final_kernel, 2, mem_state); + CLKRNARGLOC(final_kernel, 3, localbuf); + CLKERNELARG(final_kernel, 4, mem_out); + + global_work_size = gws; +} + +/* ------- Helper functions ------- */ +static size_t get_task_max_work_group_size() +{ + size_t s = autotune_get_task_max_work_group_size(FALSE, 0, init_kernel); + s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel)); + return MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, final_kernel)); +} + +static void release_clobj(void) +{ + if (crypt_out) { + HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in"); + HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt"); + HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state"); + HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out"); + + MEM_FREE(inbuffer); + MEM_FREE(crypt_out); + } +} + +static void init(struct fmt_main *_self) +{ + self = _self; + + opencl_prepare_dev(gpu_id); +} + +static void reset(struct db_main *db) +{ + if (!program[gpu_id]) { + char build_opts[64]; + + snprintf(build_opts, sizeof(build_opts), "-DPLAINTEXT_LENGTH=%u -DHASH_LOOPS=%u", + PLAINTEXT_LENGTH, HASH_LOOPS); + opencl_init("$JOHN/opencl/gost12512hash_kernel.cl", gpu_id, build_opts); + + init_kernel = clCreateKernel(program[gpu_id], "gost12512init", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel"); + + crypt_kernel = clCreateKernel(program[gpu_id], "gost12512loop", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel"); + + final_kernel = clCreateKernel(program[gpu_id], "gost12512final", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel"); + } + + // Initialize openCL tuning (library) for this format. + opencl_init_auto_setup(SEED, HASH_LOOPS, split_events, warn, + 2, self, create_clobj, release_clobj, + sizeof(mem_state), 0, db); + + // Auto tune execution from shared/included code. + autotune_run(self, LOOP_CALLS, 0, 200); +} + +static void done(void) +{ + if (program[gpu_id]) { + release_clobj(); + + HANDLE_CLERROR(clReleaseKernel(final_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseKernel(init_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program"); + + program[gpu_id] = NULL; + } +} + +/* ------- Check if the ciphertext if a valid gost12512hash crypt ------- */ +static int valid(char * ciphertext, struct fmt_main *self) { + char *pos, *start; + + if (strncmp(ciphertext, FORMAT_TAG, FORMAT_TAG_LEN)) + return 0; + + ciphertext += FORMAT_TAG_LEN; + + if (!strncmp(ciphertext, ROUNDS_PREFIX, sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + + if (!strtoul(num, &endp, 10)) + return 0; + if (*endp == '$') + ciphertext = endp + 1; + } + for (pos = ciphertext; *pos && *pos != '$'; pos++); + if (!*pos || pos < ciphertext || pos > &ciphertext[SALT_LENGTH]) return 0; + + start = ++pos; + while (atoi64[ARCH_INDEX(*pos)] != 0x7F) pos++; + if (*pos || pos - start != CIPHERTEXT_LENGTH) return 0; + return 1; +} + +/* ------- To binary functions ------- */ +#define TO_BINARY(b1, b2, b3) \ + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[3])] << 18); \ + pos += 4; \ + out[b1] = value >> 16; \ + out[b2] = value >> 8; \ + out[b3] = value; + +static void *get_binary(char * ciphertext) { + static uint32_t outbuf[64/4]; + uint32_t value; + char *pos = strrchr(ciphertext, '$') + 1; + unsigned char *out = (unsigned char*)outbuf; + int i = 0; + + do { + TO_BINARY(i, (i+21)%63, (i+42)%63); + i = (i+22)%63; + } while (i != 21); + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6); + out[63] = value; + return (void *)out; +} + +static void set_salt(void *salt) +{ + memcpy(&cur_salt, salt, sizeof(saltstruct)); + + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt, CL_FALSE, 0, sizeof(saltstruct), &cur_salt, 0, NULL, NULL), "Salt transfer"); + HANDLE_CLERROR(clFlush(queue[gpu_id]), "clFlush failed in set_salt()"); +} + +static int crypt_all(int *pcount, struct db_salt *salt) +{ + const int count = *pcount; + size_t *lws = local_work_size ? &local_work_size : NULL; + size_t gws = GET_KPC_MULTIPLE(count, local_work_size); + int index; + + // Copy data to gpu + if (new_keys) { + BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, gws * sizeof(inbuf), inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu"); + + new_keys = 0; + } + + // Run kernel + BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], init_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel"); + + uint loops = (ocl_autotune_running ? 1 : cur_salt.rounds / HASH_LOOPS); + for (index = 0; index < loops; index++) { + BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[2]), "failed in clEnqueueNDRangeKernel"); + BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel"); + opencl_process_event(); + } + BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], final_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[3]), "failed in clEnqueueNDRangeKernel"); + + // Read the result back + BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, gws * sizeof(outbuf), crypt_out, 0, NULL, multi_profilingEvent[4]), "Copy result back"); + + return count; +} + +static void *get_salt(char *ciphertext) +{ + int len; + + memset(&cur_salt, 0, sizeof(cur_salt)); + cur_salt.rounds = ROUNDS_DEFAULT; + ciphertext += FORMAT_TAG_LEN; + if (!strncmp(ciphertext, ROUNDS_PREFIX, + sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + unsigned long int srounds = strtoul(num, &endp, 10); + if (*endp == '$') + { + ciphertext = endp + 1; + srounds = srounds < ROUNDS_MIN ? + ROUNDS_MIN : srounds; + cur_salt.rounds = srounds > ROUNDS_MAX ? + ROUNDS_MAX : srounds; + } + } + + for (len = 0; ciphertext[len] != '$'; len++); + + if (len > SALT_LENGTH) + len = SALT_LENGTH; + + memcpy(cur_salt.salt, ciphertext, len); + cur_salt.len = len; + return &cur_salt; +} + +static int cmp_all(void *binary, int count) +{ + int index; + + for (index = 0; index < count; index++) + if (!memcmp(binary, crypt_out[index].v, ARCH_SIZE)) + return 1; + return 0; +} + +static int cmp_one(void *binary, int index) +{ + return !memcmp(binary, crypt_out[index].v, BINARY_SIZE); +} + +static int cmp_exact(char *source, int index) +{ + return 1; +} + +static void set_key(char *key, int index) +{ + inbuffer[index].len = strlen(key); + + memcpy(inbuffer[index].key, key, inbuffer[index].len); + + new_keys = 1; +} + +static char* get_key(int index) +{ + static char out[PLAINTEXT_LENGTH + 1]; + + memcpy(out, inbuffer[index].key, inbuffer[index].len); + out[inbuffer[index].len] = 0; + + return out; +} + +// Public domain hash function by DJ Bernstein +// We are hashing the entire struct, so rounds get included +static int salt_hash(void *salt) +{ + unsigned char *s = salt; + unsigned int hash = 5381; + unsigned int i; + + for (i = 0; i < SALT_SIZE; i++) + hash = ((hash << 5) + hash) ^ s[i]; + + return hash & (SALT_HASH_SIZE - 1); +} + +static unsigned int iteration_count(void *salt) +{ + saltstruct *p = salt; + return p->rounds; +} + +#define COMMON_GET_HASH_VAR crypt_out +#include "common-get-hash.h" + +struct fmt_main fmt_opencl_cryptgost12512 = { + { + FORMAT_LABEL, + FORMAT_NAME, + ALGORITHM_NAME, + BENCHMARK_COMMENT, + BENCHMARK_LENGTH, + 0, + PLAINTEXT_LENGTH, + BINARY_SIZE, + BINARY_ALIGN, + SALT_SIZE, + SALT_ALIGN, + MIN_KEYS_PER_CRYPT, + MAX_KEYS_PER_CRYPT, + FMT_CASE | FMT_8_BIT, + { + "iteration count", + }, + { FORMAT_TAG }, + tests + }, { + init, + done, + reset, + fmt_default_prepare, + valid, + fmt_default_split, + get_binary, + get_salt, + { + iteration_count, + }, + fmt_default_source, + { + fmt_default_binary_hash_0, + fmt_default_binary_hash_1, + fmt_default_binary_hash_2, + fmt_default_binary_hash_3, + fmt_default_binary_hash_4, + fmt_default_binary_hash_5, + fmt_default_binary_hash_6 + }, + salt_hash, + NULL, + set_salt, + set_key, + get_key, + fmt_default_clear_keys, + crypt_all, + { +#define COMMON_GET_HASH_LINK +#include "common-get-hash.h" + }, + cmp_all, + cmp_one, + cmp_exact + } +}; + +#endif /* plugin stanza */ +#endif /* HAVE_OPENCL */ diff --git a/src/opencl_gost94hash_fmt_plug.c b/src/opencl_gost94hash_fmt_plug.c new file mode 100644 index 0000000000..83b826e907 --- /dev/null +++ b/src/opencl_gost94hash_fmt_plug.c @@ -0,0 +1,488 @@ +/* + * This software is Copyright (c) 2022 magnum, + * and it is hereby released to the general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + */ + +#ifdef HAVE_OPENCL + +#if FMT_EXTERNS_H +extern struct fmt_main fmt_opencl_cryptgost94; +#elif FMT_REGISTERS_H +john_register_one(&fmt_opencl_cryptgost94); +#else + +#include + +#include "arch.h" +#include "params.h" +#include "common.h" +#include "formats.h" +#include "options.h" +#include "opencl_common.h" + +#define FORMAT_LABEL "gost94crypt-opencl" +#define FORMAT_NAME "Astra Linux $gost94hash$" +#define ALGORITHM_NAME "GOST R 34.11-94 OpenCL" + +#define PLAINTEXT_LENGTH 60 + +#define BINARY_SIZE (256/8) // 32 +#define BINARY_ALIGN 4 +#define SALT_SIZE sizeof(saltstruct) +#define SALT_ALIGN 4 + +#define SALT_LENGTH 16 + +#define MIN_KEYS_PER_CRYPT 1 +#define MAX_KEYS_PER_CRYPT 1 + +#define CIPHERTEXT_LENGTH 43 + +/* ------ Contains (at least) prepare(), valid() and split() ------ */ +/* Prefix for optional rounds specification. */ +#define ROUNDS_PREFIX "rounds=" +/* Default number of rounds if not explicitly specified. */ +#define ROUNDS_DEFAULT 5000 +/* Minimum number of rounds. Libs usually have it as 1000 but we accept any */ +#define ROUNDS_MIN 1 +/* Maximum number of rounds. */ +#define ROUNDS_MAX 999999999 + +#define BENCHMARK_COMMENT " (rounds=5000)" +#define BENCHMARK_LENGTH 0x107 +#define FORMAT_TAG "$gost94hash$" +#define FORMAT_TAG_LEN (sizeof(FORMAT_TAG)-1) + +static struct fmt_tests tests[] = { + {"$gost94hash$salt$sG.6rfU0vKHX4eL00bUDqjXxaAcQHqpJQlM3ctfj013", "magnum"}, + {"$gost94hash$longersalt$KthJlkmGINf53PizXq8phMfeIC0deIsfafswsC3UN74", "password"}, + {"$gost94hash$longersalt$EtAWEGKZQGtZeHXaSDpQJP5tLhZnOi1NC2M/7PCmHZ6", "longerpassword"}, + {NULL} +}; + +typedef struct { + unsigned int len; + char key[PLAINTEXT_LENGTH]; +} inbuf; + +typedef struct { + unsigned int v[BINARY_SIZE / sizeof(unsigned int)]; +} outbuf; +static outbuf *crypt_out; + +typedef struct { + unsigned int rounds; + unsigned int len; + unsigned char salt[SALT_LENGTH]; +} saltstruct; +static saltstruct cur_salt; + +typedef struct { + unsigned char p_bytes[PLAINTEXT_LENGTH]; + unsigned char s_bytes[SALT_LENGTH]; +} statebuf; + +typedef struct { + unsigned int sbox[4][256]; +} localbuf; + +#define STEP 0 +#define SEED 128 +#define HASH_LOOPS (42 * 4) /* Kernel is hardcoded for multiple of 42 for optimizations */ +#define LOOP_CALLS (5000 + (HASH_LOOPS - 1) / HASH_LOOPS) +#define ITERATIONS 5004 + +static inbuf *inbuffer; +static cl_int cl_error; +static cl_mem mem_in, mem_out, mem_salt, mem_state; +static cl_kernel init_kernel, final_kernel; +static int new_keys; +static struct fmt_main *self; + +static const char *warn[] = { + "key xfer: ", ", init: ", ", loop: ", ", final: ", ", result xfer: " +}; + +static int split_events[] = { 2, -1, -1 }; + +// This file contains auto-tuning routine(s). Has to be included after formats definitions. +#include "opencl_autotune.h" + +static void release_clobj(void); + +#define CL_RO CL_MEM_READ_ONLY +#define CL_WO CL_MEM_WRITE_ONLY +#define CL_RW CL_MEM_READ_WRITE + +#define CLCREATEBUFFER(_flags, _size) \ + clCreateBuffer(context[gpu_id], _flags, _size, NULL, &cl_error); \ + HANDLE_CLERROR(cl_error, "Error allocating GPU memory"); + +#define CLKERNELARG(kernel, id, arg) \ + HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), \ + "Error setting kernel argument"); + +#define CLKRNARGLOC(kernel, id, arg) \ + HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), NULL), \ + "Error setting kernel argument"); + +static void create_clobj(size_t gws, struct fmt_main *self) +{ + release_clobj(); + + inbuffer = mem_calloc(gws, sizeof(inbuf)); + crypt_out = mem_calloc(gws, sizeof(outbuf)); + + mem_in = CLCREATEBUFFER(CL_RO, gws * sizeof(inbuf)); + mem_out = CLCREATEBUFFER(CL_WO, gws * sizeof(outbuf)); + mem_state = CLCREATEBUFFER(CL_RW, gws * sizeof(statebuf)); + mem_salt = CLCREATEBUFFER(CL_RO, sizeof(saltstruct)); + + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_TRUE, 0, gws * sizeof(inbuf), inbuffer, 0, NULL, NULL), "Copy data to gpu"); + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt, CL_TRUE, 0, sizeof(saltstruct), &cur_salt, 0, NULL, NULL), "Salt transfer"); + + CLKERNELARG(init_kernel, 0, mem_in); + CLKERNELARG(init_kernel, 1, mem_salt); + CLKERNELARG(init_kernel, 2, mem_state); + CLKRNARGLOC(init_kernel, 3, localbuf); + CLKERNELARG(init_kernel, 4, mem_out); + + CLKERNELARG(crypt_kernel, 0, mem_in); + CLKERNELARG(crypt_kernel, 1, mem_salt); + CLKERNELARG(crypt_kernel, 2, mem_state); + CLKRNARGLOC(crypt_kernel, 3, localbuf); + CLKERNELARG(crypt_kernel, 4, mem_out); + + CLKERNELARG(final_kernel, 0, mem_in); + CLKERNELARG(final_kernel, 1, mem_salt); + CLKERNELARG(final_kernel, 2, mem_state); + CLKRNARGLOC(final_kernel, 3, localbuf); + CLKERNELARG(final_kernel, 4, mem_out); + + global_work_size = gws; +} + +/* ------- Helper functions ------- */ +static size_t get_task_max_work_group_size() +{ + size_t s = autotune_get_task_max_work_group_size(FALSE, 0, init_kernel); + s = MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, crypt_kernel)); + return MIN(s, autotune_get_task_max_work_group_size(FALSE, 0, final_kernel)); +} + +static void release_clobj(void) +{ + if (crypt_out) { + HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in"); + HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt"); + HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state"); + HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out"); + + MEM_FREE(inbuffer); + MEM_FREE(crypt_out); + } +} + +static void init(struct fmt_main *_self) +{ + self = _self; + + opencl_prepare_dev(gpu_id); +} + +static void reset(struct db_main *db) +{ + if (!program[gpu_id]) { + char build_opts[64]; + + snprintf(build_opts, sizeof(build_opts), "-DPLAINTEXT_LENGTH=%u -DHASH_LOOPS=%u", + PLAINTEXT_LENGTH, HASH_LOOPS); + opencl_init("$JOHN/opencl/gost94hash_kernel.cl", gpu_id, build_opts); + + init_kernel = clCreateKernel(program[gpu_id], "gost94init", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel"); + + crypt_kernel = clCreateKernel(program[gpu_id], "gost94loop", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel"); + + final_kernel = clCreateKernel(program[gpu_id], "gost94final", &ret_code); + HANDLE_CLERROR(ret_code, "Error creating kernel"); + } + + // Initialize openCL tuning (library) for this format. + opencl_init_auto_setup(SEED, HASH_LOOPS, split_events, warn, + 2, self, create_clobj, release_clobj, + sizeof(mem_state), 0, db); + + // Auto tune execution from shared/included code. + autotune_run(self, LOOP_CALLS, 0, 200); +} + +static void done(void) +{ + if (program[gpu_id]) { + release_clobj(); + + HANDLE_CLERROR(clReleaseKernel(final_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseKernel(init_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseProgram(program[gpu_id]), "Release Program"); + + program[gpu_id] = NULL; + } +} + +/* ------- Check if the ciphertext if a valid gost94hash crypt ------- */ +static int valid(char * ciphertext, struct fmt_main * self) { + char *pos, *start; + + if (strncmp(ciphertext, FORMAT_TAG, FORMAT_TAG_LEN)) + return 0; + + ciphertext += FORMAT_TAG_LEN; + + if (!strncmp(ciphertext, ROUNDS_PREFIX, sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + if (!strtoul(num, &endp, 10)) + return 0; + if (*endp == '$') + ciphertext = endp + 1; + } + for (pos = ciphertext; *pos && *pos != '$'; pos++); + if (!*pos || pos < ciphertext || pos > &ciphertext[SALT_LENGTH]) return 0; + + start = ++pos; + while (atoi64[ARCH_INDEX(*pos)] != 0x7F) pos++; + if (*pos || pos - start != CIPHERTEXT_LENGTH) return 0; + return 1; +} + +/* ------- To binary functions ------- */ +#define TO_BINARY(b1, b2, b3) \ + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12) | \ + ((uint32_t)atoi64[ARCH_INDEX(pos[3])] << 18); \ + pos += 4; \ + out[b1] = value >> 16; \ + out[b2] = value >> 8; \ + out[b3] = value; + +static void * get_binary(char * ciphertext) { + static uint32_t outbuf[BINARY_SIZE/4]; + uint32_t value; + char *pos = strrchr(ciphertext, '$') + 1; + unsigned char *out = (unsigned char*)outbuf; + int i=0; + + do { + TO_BINARY(i, (i+10)%30, (i+20)%30); + i = (i+21)%30; + } while (i != 0); + value = (uint32_t)atoi64[ARCH_INDEX(pos[0])] | + ((uint32_t)atoi64[ARCH_INDEX(pos[1])] << 6) | + ((uint32_t)atoi64[ARCH_INDEX(pos[2])] << 12); + out[31] = value >> 8; + out[30] = value; + return (void *)out; +} + +static void set_salt(void *salt) +{ + memcpy(&cur_salt, salt, sizeof(saltstruct)); + + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_salt, CL_FALSE, 0, sizeof(saltstruct), &cur_salt, 0, NULL, NULL), "Salt transfer"); + HANDLE_CLERROR(clFlush(queue[gpu_id]), "clFlush failed in set_salt()"); +} + +static int crypt_all(int *pcount, struct db_salt *salt) +{ + const int count = *pcount; + size_t *lws = local_work_size ? &local_work_size : NULL; + size_t gws = GET_KPC_MULTIPLE(count, local_work_size); + int index; + + // Copy data to gpu + if (new_keys) { + BENCH_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], mem_in, CL_FALSE, 0, gws * sizeof(inbuf), inbuffer, 0, NULL, multi_profilingEvent[0]), "Copy data to gpu"); + + new_keys = 0; + } + + // Run kernel + BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], init_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[1]), "Run initial kernel"); + + uint loops = (ocl_autotune_running ? 1 : cur_salt.rounds / HASH_LOOPS); + for (index = 0; index < loops; index++) { + BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], crypt_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[2]), "failed in clEnqueueNDRangeKernel"); + BENCH_CLERROR(clFinish(queue[gpu_id]), "Error running loop kernel"); + opencl_process_event(); + } + BENCH_CLERROR(clEnqueueNDRangeKernel(queue[gpu_id], final_kernel, 1, NULL, &gws, lws, 0, NULL, multi_profilingEvent[3]), "failed in clEnqueueNDRangeKernel"); + + // Read the result back + BENCH_CLERROR(clEnqueueReadBuffer(queue[gpu_id], mem_out, CL_TRUE, 0, gws * sizeof(outbuf), crypt_out, 0, NULL, multi_profilingEvent[4]), "Copy result back"); + + return count; +} + +static void *get_salt(char *ciphertext) +{ + int len; + + memset(&cur_salt, 0, sizeof(cur_salt)); + cur_salt.rounds = ROUNDS_DEFAULT; + ciphertext += FORMAT_TAG_LEN; + if (!strncmp(ciphertext, ROUNDS_PREFIX, + sizeof(ROUNDS_PREFIX) - 1)) { + const char *num = ciphertext + sizeof(ROUNDS_PREFIX) - 1; + char *endp; + unsigned long int srounds = strtoul(num, &endp, 10); + if (*endp == '$') + { + ciphertext = endp + 1; + srounds = srounds < ROUNDS_MIN ? + ROUNDS_MIN : srounds; + cur_salt.rounds = srounds > ROUNDS_MAX ? + ROUNDS_MAX : srounds; + } + } + + for (len = 0; ciphertext[len] != '$'; len++); + + if (len > SALT_LENGTH) + len = SALT_LENGTH; + + memcpy(cur_salt.salt, ciphertext, len); + cur_salt.len = len; + return &cur_salt; +} + +static int cmp_all(void *binary, int count) +{ + int index; + + for (index = 0; index < count; index++) + if (!memcmp(binary, crypt_out[index].v, ARCH_SIZE)) + return 1; + return 0; +} + +static int cmp_one(void *binary, int index) +{ + return !memcmp(binary, crypt_out[index].v, BINARY_SIZE); +} + +static int cmp_exact(char *source, int index) +{ + return 1; +} + +static void set_key(char *key, int index) +{ + inbuffer[index].len = strlen(key); + + memcpy(inbuffer[index].key, key, inbuffer[index].len); + + new_keys = 1; +} + +static char* get_key(int index) +{ + static char out[PLAINTEXT_LENGTH + 1]; + + memcpy(out, inbuffer[index].key, inbuffer[index].len); + out[inbuffer[index].len] = 0; + + return out; +} + +// Public domain hash function by DJ Bernstein +// We are hashing the entire struct, so rounds get included +static int salt_hash(void *salt) +{ + unsigned char *s = salt; + unsigned int hash = 5381; + unsigned int i; + + for (i = 0; i < SALT_SIZE; i++) + hash = ((hash << 5) + hash) ^ s[i]; + + return hash & (SALT_HASH_SIZE - 1); +} + +static unsigned int iteration_count(void *salt) +{ + saltstruct *p = salt; + return p->rounds; +} + +#define COMMON_GET_HASH_VAR crypt_out +#include "common-get-hash.h" + +struct fmt_main fmt_opencl_cryptgost94 = { + { + FORMAT_LABEL, + FORMAT_NAME, + ALGORITHM_NAME, + BENCHMARK_COMMENT, + BENCHMARK_LENGTH, + 0, + PLAINTEXT_LENGTH, + BINARY_SIZE, + BINARY_ALIGN, + SALT_SIZE, + SALT_ALIGN, + MIN_KEYS_PER_CRYPT, + MAX_KEYS_PER_CRYPT, + FMT_CASE | FMT_8_BIT, + { + "iteration count", + }, + { FORMAT_TAG }, + tests + }, { + init, + done, + reset, + fmt_default_prepare, + valid, + fmt_default_split, + get_binary, + get_salt, + { + iteration_count, + }, + fmt_default_source, + { + fmt_default_binary_hash_0, + fmt_default_binary_hash_1, + fmt_default_binary_hash_2, + fmt_default_binary_hash_3, + fmt_default_binary_hash_4, + fmt_default_binary_hash_5, + fmt_default_binary_hash_6 + }, + salt_hash, + NULL, + set_salt, + set_key, + get_key, + fmt_default_clear_keys, + crypt_all, + { +#define COMMON_GET_HASH_LINK +#include "common-get-hash.h" + }, + cmp_all, + cmp_one, + cmp_exact + } +}; + +#endif /* plugin stanza */ +#endif /* HAVE_OPENCL */