diff --git a/CMakeLists.txt b/CMakeLists.txt index 453dfd99ee..296fa715dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,6 +131,17 @@ set(SOURCES_CRYPTO src/crypto/common/MemoryPool.cpp src/crypto/common/Nonce.cpp src/crypto/common/VirtualMemory.cpp + src/crypto/flex/flex.cpp + src/crypto/flex/cryptonote/cryptonight_dark.c + src/crypto/flex/cryptonote/cryptonight_dark_lite.c + src/crypto/flex/cryptonote/cryptonight_fast.c + src/crypto/flex/cryptonote/cryptonight_lite.c + src/crypto/flex/cryptonote/cryptonight_turtle.c + src/crypto/flex/cryptonote/cryptonight_turtle_lite.c + src/crypto/flex/cryptonote/crypto/oaes_lib.c + src/crypto/flex/cryptonote/crypto/aesb.c + src/crypto/flex/cryptonote/crypto/hash.c + src/crypto/flex/cryptonote/crypto/c_keccak.c ) if (WITH_MO_BENCHMARK) diff --git a/src/backend/cpu/CpuConfig_gen.h b/src/backend/cpu/CpuConfig_gen.h index 988b78646b..6b4875db0d 100644 --- a/src/backend/cpu/CpuConfig_gen.h +++ b/src/backend/cpu/CpuConfig_gen.h @@ -165,7 +165,10 @@ size_t inline generate(Threads &threads, uint32_t template<> size_t inline generate(Threads& threads, uint32_t limit) { - return generate(Algorithm::kGHOSTRIDER, threads, Algorithm::GHOSTRIDER_RTM, limit); + size_t count = 0; + count += generate(Algorithm::kGHOSTRIDER, threads, Algorithm::GHOSTRIDER_RTM, limit); + count += generate(Algorithm::kFLEX, threads, Algorithm::FLEX_KCN, limit); + return count; } #endif diff --git a/src/backend/cpu/CpuWorker.cpp b/src/backend/cpu/CpuWorker.cpp index 797ff13e4b..505c7a4dec 100644 --- a/src/backend/cpu/CpuWorker.cpp +++ b/src/backend/cpu/CpuWorker.cpp @@ -37,6 +37,7 @@ #include "crypto/rx/RxDataset.h" #include "crypto/rx/RxVm.h" #include "crypto/ghostrider/ghostrider.h" +#include "crypto/flex/flex.h" #include "net/JobResults.h" @@ -168,7 +169,13 @@ bool xmrig::CpuWorker::selfTest() # ifdef XMRIG_ALGO_GHOSTRIDER if (m_algorithm.family() == Algorithm::GHOSTRIDER) { - return (N == 8) && verify(Algorithm::GHOSTRIDER_RTM, test_output_gr); + switch (m_algorithm.id()) { + case Algorithm::GHOSTRIDER_RTM: + return (N == 8) && verify(Algorithm::GHOSTRIDER_RTM, test_output_gr); + case Algorithm::FLEX_KCN: + return verify(Algorithm::FLEX_KCN, test_output_flex); + default:; + } } # endif @@ -327,11 +334,20 @@ void xmrig::CpuWorker::start() # ifdef XMRIG_ALGO_GHOSTRIDER case Algorithm::GHOSTRIDER: - if (N == 8) { - ghostrider::hash_octa(m_job.blob(), job.size(), m_hash, m_ctx, m_ghHelper); - } - else { - valid = false; + switch (job.algorithm()) { + case Algorithm::GHOSTRIDER_RTM: + if (N == 8) { + ghostrider::hash_octa(m_job.blob(), job.size(), m_hash, m_ctx, m_ghHelper); + } + else { + valid = false; + } + break; + case Algorithm::FLEX_KCN: + flex_hash(reinterpret_cast(m_job.blob()), reinterpret_cast(m_hash), m_ctx); + break; + default: + valid = false; } break; # endif @@ -398,7 +414,8 @@ template bool xmrig::CpuWorker::verify(const Algorithm &algorithm, const uint8_t *referenceValue) { # ifdef XMRIG_ALGO_GHOSTRIDER - if (algorithm == Algorithm::GHOSTRIDER_RTM) { + switch (algorithm) { + case Algorithm::GHOSTRIDER_RTM: { uint8_t blob[N * 80] = {}; for (size_t i = 0; i < N; ++i) { blob[i * 80 + 0] = static_cast(i); @@ -425,6 +442,19 @@ bool xmrig::CpuWorker::verify(const Algorithm &algorithm, const uint8_t *refe } return true; + } + case Algorithm::FLEX_KCN: { + const uint8_t header[80] = { + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0xcc, 0xa6, 0x6a, + 0x44, 0xf8, 0xbd, 0x55, 0x45, 0xc3, 0x16, 0x4a, 0x3a, 0x76, 0xda, 0x50, 0x39, 0x53, 0x28, 0xc9, 0x07, 0x56, 0x33, 0x77, + 0x5b, 0xc4, 0xc8, 0x79, 0x8f, 0xd6, 0x77, 0x2b, 0x70, 0x0d, 0x21, 0x5c, 0xf0, 0xff, 0x0f, 0x1e, 0x00, 0x00, 0x00, 0x00 + }; + char hash[32] = {}; + flex_hash(reinterpret_cast(header), hash, m_ctx); + return memcmp(referenceValue, hash, sizeof hash) == 0; + } + default:; } # endif diff --git a/src/base/crypto/Algorithm.cpp b/src/base/crypto/Algorithm.cpp index f52fca003e..4bfc5c8384 100644 --- a/src/base/crypto/Algorithm.cpp +++ b/src/base/crypto/Algorithm.cpp @@ -103,6 +103,8 @@ const char *Algorithm::kKAWPOW_RVN = "kawpow"; #ifdef XMRIG_ALGO_GHOSTRIDER const char* Algorithm::kGHOSTRIDER = "ghostrider"; const char* Algorithm::kGHOSTRIDER_RTM = "ghostrider"; +const char* Algorithm::kFLEX = "flex"; +const char* Algorithm::kFLEX_KCN = "flex"; #endif #ifdef XMRIG_ALGO_RANDOMX @@ -178,6 +180,7 @@ static const std::map kAlgorithmNames = { # ifdef XMRIG_ALGO_GHOSTRIDER ALGO_NAME(GHOSTRIDER_RTM), + ALGO_NAME(FLEX_KCN), # endif }; @@ -303,6 +306,8 @@ static const std::map kAlgorithmAlias # ifdef XMRIG_ALGO_GHOSTRIDER ALGO_ALIAS_AUTO(GHOSTRIDER_RTM), ALGO_ALIAS(GHOSTRIDER_RTM, "ghostrider/rtm"), ALGO_ALIAS(GHOSTRIDER_RTM, "gr"), + ALGO_ALIAS_AUTO(FLEX_KCN), ALGO_ALIAS(FLEX_KCN, "flex/kcn"), + ALGO_ALIAS(FLEX_KCN, "flex"), # endif }; @@ -380,7 +385,8 @@ std::vector xmrig::Algorithm::all(const std::function(m_extraNonce2Size * 2)), allocator); params.PushBack(Value(m_ntime.data(), allocator), allocator); @@ -114,7 +114,7 @@ int64_t xmrig::EthStratumClient::submit(const JobResult& result) uint64_t actual_diff; # ifdef XMRIG_ALGO_GHOSTRIDER - if (result.algorithm == Algorithm::GHOSTRIDER_RTM) { + if (result.algorithm.family() == Algorithm::GHOSTRIDER) { actual_diff = reinterpret_cast(result.result())[3]; } else @@ -202,7 +202,7 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj return; } - if (m_pool.algorithm().id() != Algorithm::GHOSTRIDER_RTM) { + if (m_pool.algorithm().family() != Algorithm::GHOSTRIDER) { return; } @@ -236,7 +236,7 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj algo = m_pool.coin().algorithm(); } - const size_t min_arr_size = (algo.id() == Algorithm::GHOSTRIDER_RTM) ? 8 : 6; + const size_t min_arr_size = (algo.family() == Algorithm::GHOSTRIDER) ? 8 : 6; if (arr.Size() < min_arr_size) { LOG_ERR("%s " RED("invalid mining.notify notification: params array has wrong size"), tag()); @@ -257,7 +257,7 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj std::stringstream s; # ifdef XMRIG_ALGO_GHOSTRIDER - if (algo.id() == Algorithm::GHOSTRIDER_RTM) { + if (algo.family() == Algorithm::GHOSTRIDER) { // Raptoreum uses Bitcoin's Stratum protocol // https://en.bitcoinwiki.org/wiki/Stratum_mining_protocol#mining.notify diff --git a/src/base/net/stratum/benchmark/BenchClient.cpp b/src/base/net/stratum/benchmark/BenchClient.cpp index a9459e1f9e..0eef26e728 100644 --- a/src/base/net/stratum/benchmark/BenchClient.cpp +++ b/src/base/net/stratum/benchmark/BenchClient.cpp @@ -49,7 +49,7 @@ xmrig::BenchClient::BenchClient(const std::shared_ptr &benchmark, I blob.back() = '\0'; # ifdef XMRIG_ALGO_GHOSTRIDER - if (m_benchmark->algorithm() == Algorithm::GHOSTRIDER_RTM) { + if (m_benchmark->algorithm().family() == Algorithm::GHOSTRIDER) { const uint32_t q = (benchmark->rotation() / 20) & 1; const uint32_t r = benchmark->rotation() % 20; diff --git a/src/core/MoBenchmark.cpp b/src/core/MoBenchmark.cpp index b898975029..76e149e5e7 100644 --- a/src/core/MoBenchmark.cpp +++ b/src/core/MoBenchmark.cpp @@ -139,6 +139,7 @@ double MoBenchmark::get_algo_perf(Algorithm::Id algo) const { case Algorithm::RX_ARQ: return m_bench_algo_perf[BenchAlgo::RX_ARQ]; case Algorithm::RX_XLA: return m_bench_algo_perf[BenchAlgo::RX_XLA]; case Algorithm::GHOSTRIDER_RTM: return m_bench_algo_perf[BenchAlgo::GHOSTRIDER_RTM]; + case Algorithm::FLEX_KCN: return m_bench_algo_perf[BenchAlgo::FLEX_KCN]; default: return 0.0f; } } @@ -165,6 +166,7 @@ void MoBenchmark::start(const BenchAlgo bench_algo) { break; case BenchAlgo::GHOSTRIDER_RTM: + case BenchAlgo::FLEX_KCN: job.setBlob("000000208c246d0b90c3b389c4086e8b672ee040d64db5b9648527133e217fbfa48da64c0f3c0a0b0e8350800568b40fbb323ac3ccdf2965de51b9aaeb939b4f11ff81c49b74a16156ff251c00000000"); job.setDiff(1000); break; @@ -229,6 +231,15 @@ void MoBenchmark::onJobResult(const JobResult& result) { m_bench_algo_perf[m_bench_algo] = hashrate; // store hashrate result LOG_INFO("%s " BRIGHT_BLACK_BG(WHITE_BOLD_S " Algo " MAGENTA_BOLD_S "%s" WHITE_BOLD_S " hashrate: " CYAN_BOLD_S "%f "), Tags::benchmark(), Algorithm(ba2a[m_bench_algo]).name(), hashrate); run_next_bench_algo(m_bench_algo); + } else switch(m_bench_algo) { // Update GhostRider algo job to produce more accurate perf results + case BenchAlgo::GHOSTRIDER_RTM: { + Job& job = *m_bench_job[m_bench_algo]; + uint8_t* blob = job.blob(); + ++ *reinterpret_cast(blob+4); + m_controller->miner()->setJob(job, false); + break; + } + default:; } } diff --git a/src/core/MoBenchmark.h b/src/core/MoBenchmark.h index f24448df5d..8e8c01a1bd 100644 --- a/src/core/MoBenchmark.h +++ b/src/core/MoBenchmark.h @@ -34,6 +34,7 @@ class Job; class MoBenchmark : public IJobResultListener { enum BenchAlgo : int { + FLEX_KCN, // "flex" Flex GHOSTRIDER_RTM, // "ghostrider" GhostRider CN_R, // "cn/r" CryptoNightR (Monero's variant 4). CN_LITE_1, // "cn-lite/1" CryptoNight-Lite variant 1. @@ -53,6 +54,7 @@ class MoBenchmark : public IJobResultListener { }; const Algorithm::Id ba2a[BenchAlgo::MAX] = { + Algorithm::FLEX_KCN, Algorithm::GHOSTRIDER_RTM, Algorithm::CN_R, Algorithm::CN_LITE_1, diff --git a/src/core/config/ConfigTransform.cpp b/src/core/config/ConfigTransform.cpp index e882e10623..488d1bb816 100644 --- a/src/core/config/ConfigTransform.cpp +++ b/src/core/config/ConfigTransform.cpp @@ -45,7 +45,7 @@ static const char *kEnabled = "enabled"; static const char *kIntensity = "intensity"; static const char *kThreads = "threads"; #ifdef XMRIG_ALGO_KAWPOW -static const char *kKawPow = "kawpow"; +//static const char *kKawPow = "kawpow"; #endif diff --git a/src/crypto/cn/CryptoNight_test.h b/src/crypto/cn/CryptoNight_test.h index b2c742c5dc..1e6e2141c3 100644 --- a/src/crypto/cn/CryptoNight_test.h +++ b/src/crypto/cn/CryptoNight_test.h @@ -469,6 +469,11 @@ const static uint8_t test_output_gr[256] = { 0xC4, 0xA7, 0xC7, 0x77, 0xAD, 0xF8, 0x09, 0x61, 0x16, 0xBB, 0xAA, 0x7E, 0xAB, 0xC3, 0x00, 0x25, 0xBA, 0xA8, 0x97, 0xC7, 0x7D, 0x38, 0x46, 0x0E, 0x59, 0xAC, 0xCB, 0xAE, 0xFE, 0x3C, 0x6F, 0x01 }; +// "Flex" +const static uint8_t test_output_flex[32] = { + 0x2e, 0x4f, 0x85, 0x7a, 0xa8, 0x10, 0x08, 0xc4, 0xd1, 0xfe, 0x9a, 0xcd, 0x74, 0x89, 0xe8, 0x4d, + 0x3b, 0xc5, 0x5b, 0x70, 0x54, 0xe6, 0xc0, 0x2b, 0x2c, 0x0e, 0x1b, 0x76, 0xcc, 0xa0, 0xda, 0x7b +}; #endif diff --git a/src/crypto/cn/CryptoNight_x86.h b/src/crypto/cn/CryptoNight_x86.h index 55f828b867..c208aaca8b 100644 --- a/src/crypto/cn/CryptoNight_x86.h +++ b/src/crypto/cn/CryptoNight_x86.h @@ -1238,9 +1238,7 @@ static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict_ keccak(input, size, ctx[0]->state); - if (props.half_mem()) { - ctx[0]->first_half = true; - } + if (props.half_mem()) ctx[0]->first_half = true; cn_explode_scratchpad(ctx[0]); VARIANT1_INIT(0); diff --git a/src/crypto/flex/cryptonote/crypto/aesb.c b/src/crypto/flex/cryptonote/crypto/aesb.c new file mode 100644 index 0000000000..ebe70cdcac --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/aesb.c @@ -0,0 +1,177 @@ +/* +--------------------------------------------------------------------------- +Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved. + +The redistribution and use of this software (with or without changes) +is allowed without the payment of fees or royalties provided that: + + source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation. + +This software is provided 'as is' with no explicit or implied warranties +in respect of its operation, including, but not limited to, correctness +and fitness for purpose. +--------------------------------------------------------------------------- +Issue Date: 20/12/2007 +*/ + +#include + +#if defined(__cplusplus) +extern "C" +{ +#endif + +#define TABLE_ALIGN 32 +#define WPOLY 0x011b +#define N_COLS 4 +#define AES_BLOCK_SIZE 16 +#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2)) + +#if defined(_MSC_VER) +#define ALIGN __declspec(align(TABLE_ALIGN)) +#elif defined(__GNUC__) +#define ALIGN __attribute__ ((aligned(16))) +#else +#define ALIGN +#endif + +#define rf1(r,c) (r) +#define word_in(x,c) (*((uint32_t*)(x)+(c))) +#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v)) + +#define s(x,c) x[c] +#define si(y,x,c) (s(y,c) = word_in(x, c)) +#define so(y,x,c) word_out(y, c, s(x,c)) +#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3) +#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3) +#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3) +#define to_byte(x) ((x) & 0xff) +#define bval(x,n) to_byte((x) >> (8 * (n))) + +#define fwd_var(x,r,c)\ + ( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\ + : r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\ + : r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\ + : ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2))) + +#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c)) + +#define sb_data(w) {\ + w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\ + w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\ + w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\ + w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\ + w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\ + w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\ + w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\ + w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\ + w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\ + w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\ + w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\ + w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\ + w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\ + w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\ + w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\ + w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\ + w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\ + w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\ + w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\ + w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\ + w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\ + w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\ + w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\ + w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\ + w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\ + w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\ + w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\ + w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\ + w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\ + w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\ + w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\ + w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) } + +#define rc_data(w) {\ + w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\ + w(0x1b), w(0x36) } + +#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \ + ((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0)) + +#define h0(x) (x) +#define w0(p) bytes2word(p, 0, 0, 0) +#define w1(p) bytes2word(0, p, 0, 0) +#define w2(p) bytes2word(0, 0, p, 0) +#define w3(p) bytes2word(0, 0, 0, p) + +#define u0(p) bytes2word(f2(p), p, p, f3(p)) +#define u1(p) bytes2word(f3(p), f2(p), p, p) +#define u2(p) bytes2word(p, f3(p), f2(p), p) +#define u3(p) bytes2word(p, p, f3(p), f2(p)) + +#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p)) +#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p)) +#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p)) +#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p)) + +#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY)) +#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY)) +#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY)) +#define f3(x) (f2(x) ^ x) +#define f9(x) (f8(x) ^ x) +#define fb(x) (f8(x) ^ f2(x) ^ x) +#define fd(x) (f8(x) ^ f4(x) ^ x) +#define fe(x) (f8(x) ^ f4(x) ^ f2(x)) + +#define t_dec(m,n) t_##m##n +#define t_set(m,n) t_##m##n +#define t_use(m,n) t_##m##n + +#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) } + +#define four_tables(x,tab,vf,rf,c) \ + (tab[0][bval(vf(x,0,c),rf(0,c))] \ + ^ tab[1][bval(vf(x,1,c),rf(1,c))] \ + ^ tab[2][bval(vf(x,2,c),rf(2,c))] \ + ^ tab[3][bval(vf(x,3,c),rf(3,c))]) + +d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3); + +void aesb_single_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey) +{ + uint32_t b0[4], b1[4]; + const uint32_t *kp = (uint32_t *) expandedKey; + state_in(b0, in); + + round(fwd_rnd, b1, b0, kp); + + state_out(out, b1); +} + +void aesb_pseudo_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey) +{ + uint32_t b0[4], b1[4]; + const uint32_t *kp = (uint32_t *) expandedKey; + state_in(b0, in); + + round(fwd_rnd, b1, b0, kp); + round(fwd_rnd, b0, b1, kp + 1 * N_COLS); + round(fwd_rnd, b1, b0, kp + 2 * N_COLS); + round(fwd_rnd, b0, b1, kp + 3 * N_COLS); + round(fwd_rnd, b1, b0, kp + 4 * N_COLS); + round(fwd_rnd, b0, b1, kp + 5 * N_COLS); + round(fwd_rnd, b1, b0, kp + 6 * N_COLS); + round(fwd_rnd, b0, b1, kp + 7 * N_COLS); + round(fwd_rnd, b1, b0, kp + 8 * N_COLS); + round(fwd_rnd, b0, b1, kp + 9 * N_COLS); + + state_out(out, b0); +} + + +#if defined(__cplusplus) +} +#endif diff --git a/src/crypto/flex/cryptonote/crypto/c_blake256.h b/src/crypto/flex/cryptonote/crypto/c_blake256.h new file mode 100644 index 0000000000..b9c2aad0df --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/c_blake256.h @@ -0,0 +1,43 @@ +#ifndef _BLAKE256_H_ +#define _BLAKE256_H_ + +#include + +typedef struct { + uint32_t h[8], s[4], t[2]; + int buflen, nullt; + uint8_t buf[64]; +} state; + +typedef struct { + state inner; + state outer; +} hmac_state; + +void blake256_init(state *); +void blake224_init(state *); + +void blake256_update(state *, const uint8_t *, uint64_t); +void blake224_update(state *, const uint8_t *, uint64_t); + +void blake256_final(state *, uint8_t *); +void blake224_final(state *, uint8_t *); + +void blake256_hash(uint8_t *, const uint8_t *, uint64_t); +void blake224_hash(uint8_t *, const uint8_t *, uint64_t); + +/* HMAC functions: */ + +void hmac_blake256_init(hmac_state *, const uint8_t *, uint64_t); +void hmac_blake224_init(hmac_state *, const uint8_t *, uint64_t); + +void hmac_blake256_update(hmac_state *, const uint8_t *, uint64_t); +void hmac_blake224_update(hmac_state *, const uint8_t *, uint64_t); + +void hmac_blake256_final(hmac_state *, uint8_t *); +void hmac_blake224_final(hmac_state *, uint8_t *); + +void hmac_blake256_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); +void hmac_blake224_hash(uint8_t *, const uint8_t *, uint64_t, const uint8_t *, uint64_t); + +#endif /* _BLAKE256_H_ */ diff --git a/src/crypto/flex/cryptonote/crypto/c_groestl.h b/src/crypto/flex/cryptonote/crypto/c_groestl.h new file mode 100644 index 0000000000..21069093f5 --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/c_groestl.h @@ -0,0 +1,56 @@ +#pragma once +/* +#include "crypto_uint8.h" +#include "crypto_uint32.h" +#include "crypto_uint64.h" +#include "crypto_hash.h" + +typedef crypto_uint8 uint8_t; +typedef crypto_uint32 uint32_t; +typedef crypto_uint64 uint64_t; +*/ +#include +#include "hash.h" + +/* some sizes (number of bytes) */ +#define ROWS 8 +#define LENGTHFIELDLEN ROWS +#define COLS512 8 + +#define SIZE512 (ROWS*COLS512) + +#define ROUNDS512 10 +#define HASH_BIT_LEN 256 + +#define ROTL32(v, n) ((((v)<<(n))|((v)>>(32-(n))))&li_32(ffffffff)) + + +#define li_32(h) 0x##h##u +#define EXT_BYTE(var,n) ((uint8_t)((uint32_t)(var) >> (8*n))) +#define u32BIG(a) \ + ((ROTL32(a,8) & li_32(00FF00FF)) | \ + (ROTL32(a,24) & li_32(FF00FF00))) + + +/* NIST API begin */ +typedef struct { + uint32_t chaining[SIZE512/sizeof(uint32_t)]; /* actual state */ + uint32_t block_counter1, + block_counter2; /* message block counter(s) */ + BitSequence buffer[SIZE512]; /* data buffer */ + int buf_ptr; /* data buffer pointer */ + int bits_in_last_byte; /* no. of message bits in last byte of + data buffer */ +} hashState; + +/*void Init(hashState*); +void Update(hashState*, const BitSequence*, DataLength); +void Final(hashState*, BitSequence*); */ +void groestl(const BitSequence*, DataLength, BitSequence*); +/* NIST API end */ + +/* +int crypto_hash(unsigned char *out, + const unsigned char *in, + unsigned long long len); +*/ diff --git a/src/crypto/flex/cryptonote/crypto/c_jh.h b/src/crypto/flex/cryptonote/crypto/c_jh.h new file mode 100644 index 0000000000..8084ec72b4 --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/c_jh.h @@ -0,0 +1,20 @@ +/*This program gives the 64-bit optimized bitslice implementation of JH using ANSI C + + -------------------------------- + Performance + + Microprocessor: Intel CORE 2 processor (Core 2 Duo Mobile T6600 2.2GHz) + Operating System: 64-bit Ubuntu 10.04 (Linux kernel 2.6.32-22-generic) + Speed for long message: + 1) 45.8 cycles/byte compiler: Intel C++ Compiler 11.1 compilation option: icc -O2 + 2) 56.8 cycles/byte compiler: gcc 4.4.3 compilation option: gcc -O3 + + -------------------------------- + Last Modified: January 16, 2011 +*/ +#pragma once +#include "hash.h" + +typedef enum {SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2} HashReturn; + +HashReturn jh_hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval); diff --git a/src/crypto/flex/cryptonote/crypto/c_keccak.c b/src/crypto/flex/cryptonote/crypto/c_keccak.c new file mode 100644 index 0000000000..a7e5bb4008 --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/c_keccak.c @@ -0,0 +1,112 @@ +// keccak.c +// 19-Nov-11 Markku-Juhani O. Saarinen +// A baseline Keccak (3rd round) implementation. + +#include "hash-ops.h" +#include "c_keccak.h" + +const uint64_t keccakf_rndc[24] = +{ + 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, + 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, + 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, + 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 +}; + +const int keccakf_rotc[24] = +{ + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 +}; + +const int keccakf_piln[24] = +{ + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 +}; + +// update the state with given number of rounds + +void keccakf(uint64_t st[25], int rounds) +{ + int i, j, round; + uint64_t t, bc[5]; + + for (round = 0; round < rounds; round++) { + + // Theta + for (i = 0; i < 5; i++) + bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20]; + + for (i = 0; i < 5; i++) { + t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); + for (j = 0; j < 25; j += 5) + st[j + i] ^= t; + } + + // Rho Pi + t = st[1]; + for (i = 0; i < 24; i++) { + j = keccakf_piln[i]; + bc[0] = st[j]; + st[j] = ROTL64(t, keccakf_rotc[i]); + t = bc[0]; + } + + // Chi + for (j = 0; j < 25; j += 5) { + for (i = 0; i < 5; i++) + bc[i] = st[j + i]; + for (i = 0; i < 5; i++) + st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5]; + } + + // Iota + st[0] ^= keccakf_rndc[round]; + } +} + +// compute a keccak hash (md) of given byte length from "in" +typedef uint64_t state_t[25]; + +int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen) +{ + state_t st; + uint8_t temp[144]; + int i, rsiz, rsizw; + + rsiz = sizeof(state_t) == mdlen ? HASH_DATA_AREA : 200 - 2 * mdlen; + rsizw = rsiz / 8; + + memset(st, 0, sizeof(st)); + + for ( ; inlen >= rsiz; inlen -= rsiz, in += rsiz) { + for (i = 0; i < rsizw; i++) + st[i] ^= ((uint64_t *) in)[i]; + keccakf(st, KECCAK_ROUNDS); + } + + // last block and padding + memcpy(temp, in, inlen); + temp[inlen++] = 1; + memset(temp + inlen, 0, rsiz - inlen); + temp[rsiz - 1] |= 0x80; + + for (i = 0; i < rsizw; i++) + st[i] ^= ((uint64_t *) temp)[i]; + + keccakf(st, KECCAK_ROUNDS); + + memcpy(md, st, mdlen); + + return 0; +} + +void keccak1600(const uint8_t *in, int inlen, uint8_t *md) +{ + keccak(in, inlen, md, sizeof(state_t)); +} diff --git a/src/crypto/flex/cryptonote/crypto/c_keccak.h b/src/crypto/flex/cryptonote/crypto/c_keccak.h new file mode 100644 index 0000000000..4f7f85729a --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/c_keccak.h @@ -0,0 +1,26 @@ +// keccak.h +// 19-Nov-11 Markku-Juhani O. Saarinen + +#ifndef KECCAK_H +#define KECCAK_H + +#include +#include + +#ifndef KECCAK_ROUNDS +#define KECCAK_ROUNDS 24 +#endif + +#ifndef ROTL64 +#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) +#endif + +// compute a keccak hash (md) of given byte length from "in" +int keccak(const uint8_t *in, int inlen, uint8_t *md, int mdlen); + +// update the state +void keccakf(uint64_t st[25], int norounds); + +void keccak1600(const uint8_t *in, int inlen, uint8_t *md); + +#endif diff --git a/src/crypto/flex/cryptonote/crypto/crypto.h b/src/crypto/flex/cryptonote/crypto/crypto.h new file mode 100644 index 0000000000..61641fbcfc --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/crypto.h @@ -0,0 +1,186 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#pragma once + +#include +#include +#include + +#include "common/pod-class.h" +#include "generic-ops.h" +#include "hash.h" + +namespace crypto { + + extern "C" { +#include "random.h" + } + + extern std::mutex random_lock; + +#pragma pack(push, 1) + POD_CLASS ec_point { + char data[32]; + }; + + POD_CLASS ec_scalar { + char data[32]; + }; + + POD_CLASS public_key: ec_point { + friend class crypto_ops; + }; + + POD_CLASS secret_key: ec_scalar { + friend class crypto_ops; + }; + + POD_CLASS key_derivation: ec_point { + friend class crypto_ops; + }; + + POD_CLASS key_image: ec_point { + friend class crypto_ops; + }; + + POD_CLASS signature { + ec_scalar c, r; + friend class crypto_ops; + }; +#pragma pack(pop) + + static_assert(sizeof(ec_point) == 32 && sizeof(ec_scalar) == 32 && + sizeof(public_key) == 32 && sizeof(secret_key) == 32 && + sizeof(key_derivation) == 32 && sizeof(key_image) == 32 && + sizeof(signature) == 64, "Invalid structure size"); + + class crypto_ops { + crypto_ops(); + crypto_ops(const crypto_ops &); + void operator=(const crypto_ops &); + ~crypto_ops(); + + static void generate_keys(public_key &, secret_key &); + friend void generate_keys(public_key &, secret_key &); + static bool check_key(const public_key &); + friend bool check_key(const public_key &); + static bool secret_key_to_public_key(const secret_key &, public_key &); + friend bool secret_key_to_public_key(const secret_key &, public_key &); + static bool generate_key_derivation(const public_key &, const secret_key &, key_derivation &); + friend bool generate_key_derivation(const public_key &, const secret_key &, key_derivation &); + static bool derive_public_key(const key_derivation &, std::size_t, const public_key &, public_key &); + friend bool derive_public_key(const key_derivation &, std::size_t, const public_key &, public_key &); + static void derive_secret_key(const key_derivation &, std::size_t, const secret_key &, secret_key &); + friend void derive_secret_key(const key_derivation &, std::size_t, const secret_key &, secret_key &); + static void generate_signature(const hash &, const public_key &, const secret_key &, signature &); + friend void generate_signature(const hash &, const public_key &, const secret_key &, signature &); + static bool check_signature(const hash &, const public_key &, const signature &); + friend bool check_signature(const hash &, const public_key &, const signature &); + static void generate_key_image(const public_key &, const secret_key &, key_image &); + friend void generate_key_image(const public_key &, const secret_key &, key_image &); + static void generate_ring_signature(const hash &, const key_image &, + const public_key *const *, std::size_t, const secret_key &, std::size_t, signature *); + friend void generate_ring_signature(const hash &, const key_image &, + const public_key *const *, std::size_t, const secret_key &, std::size_t, signature *); + static bool check_ring_signature(const hash &, const key_image &, + const public_key *const *, std::size_t, const signature *); + friend bool check_ring_signature(const hash &, const key_image &, + const public_key *const *, std::size_t, const signature *); + }; + + /* Generate a value filled with random bytes. + */ + template + typename std::enable_if::value, T>::type rand() { + typename std::remove_cv::type res; + std::lock_guard lock(random_lock); + generate_random_bytes(sizeof(T), &res); + return res; + } + + /* Generate a new key pair + */ + inline void generate_keys(public_key &pub, secret_key &sec) { + crypto_ops::generate_keys(pub, sec); + } + + /* Check a public key. Returns true if it is valid, false otherwise. + */ + inline bool check_key(const public_key &key) { + return crypto_ops::check_key(key); + } + + /* Checks a private key and computes the corresponding public key. + */ + inline bool secret_key_to_public_key(const secret_key &sec, public_key &pub) { + return crypto_ops::secret_key_to_public_key(sec, pub); + } + + /* To generate an ephemeral key used to send money to: + * * The sender generates a new key pair, which becomes the transaction key. The public transaction key is included in "extra" field. + * * Both the sender and the receiver generate key derivation from the transaction key, the receivers' "view" key and the output index. + * * The sender uses key derivation and the receivers' "spend" key to derive an ephemeral public key. + * * The receiver can either derive the public key (to check that the transaction is addressed to him) or the private key (to spend the money). + */ + inline bool generate_key_derivation(const public_key &key1, const secret_key &key2, key_derivation &derivation) { + return crypto_ops::generate_key_derivation(key1, key2, derivation); + } + inline bool derive_public_key(const key_derivation &derivation, std::size_t output_index, + const public_key &base, public_key &derived_key) { + return crypto_ops::derive_public_key(derivation, output_index, base, derived_key); + } + inline void derive_secret_key(const key_derivation &derivation, std::size_t output_index, + const secret_key &base, secret_key &derived_key) { + crypto_ops::derive_secret_key(derivation, output_index, base, derived_key); + } + + /* Generation and checking of a standard signature. + */ + inline void generate_signature(const hash &prefix_hash, const public_key &pub, const secret_key &sec, signature &sig) { + crypto_ops::generate_signature(prefix_hash, pub, sec, sig); + } + inline bool check_signature(const hash &prefix_hash, const public_key &pub, const signature &sig) { + return crypto_ops::check_signature(prefix_hash, pub, sig); + } + + /* To send money to a key: + * * The sender generates an ephemeral key and includes it in transaction output. + * * To spend the money, the receiver generates a key image from it. + * * Then he selects a bunch of outputs, including the one he spends, and uses them to generate a ring signature. + * To check the signature, it is necessary to collect all the keys that were used to generate it. To detect double spends, it is necessary to check that each key image is used at most once. + */ + inline void generate_key_image(const public_key &pub, const secret_key &sec, key_image &image) { + crypto_ops::generate_key_image(pub, sec, image); + } + inline void generate_ring_signature(const hash &prefix_hash, const key_image &image, + const public_key *const *pubs, std::size_t pubs_count, + const secret_key &sec, std::size_t sec_index, + signature *sig) { + crypto_ops::generate_ring_signature(prefix_hash, image, pubs, pubs_count, sec, sec_index, sig); + } + inline bool check_ring_signature(const hash &prefix_hash, const key_image &image, + const public_key *const *pubs, std::size_t pubs_count, + const signature *sig) { + return crypto_ops::check_ring_signature(prefix_hash, image, pubs, pubs_count, sig); + } + + /* Variants with vector parameters. + */ + inline void generate_ring_signature(const hash &prefix_hash, const key_image &image, + const std::vector &pubs, + const secret_key &sec, std::size_t sec_index, + signature *sig) { + generate_ring_signature(prefix_hash, image, pubs.data(), pubs.size(), sec, sec_index, sig); + } + inline bool check_ring_signature(const hash &prefix_hash, const key_image &image, + const std::vector &pubs, + const signature *sig) { + return check_ring_signature(prefix_hash, image, pubs.data(), pubs.size(), sig); + } +} + +CRYPTO_MAKE_COMPARABLE(public_key) +CRYPTO_MAKE_HASHABLE(key_image) +CRYPTO_MAKE_COMPARABLE(signature) diff --git a/src/crypto/flex/cryptonote/crypto/hash-ops.h b/src/crypto/flex/cryptonote/crypto/hash-ops.h new file mode 100644 index 0000000000..60950cdc37 --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/hash-ops.h @@ -0,0 +1,57 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#pragma once + +#if !defined(__cplusplus) + +#include +#include +#include +#include + +#include "int-util.h" + +static inline void *padd(void *p, size_t i) { + return (char *) p + i; +} + +static inline const void *cpadd(const void *p, size_t i) { + return (const char *) p + i; +} + +static inline void place_length(uint8_t *buffer, size_t bufsize, size_t length) { + if (sizeof(size_t) == 4) { + *(uint32_t *) padd(buffer, bufsize - 4) = swap32be(length); + } else { + *(uint64_t *) padd(buffer, bufsize - 8) = swap64be(length); + } +} + +#pragma pack(push, 1) +union hash_state { + uint8_t b[200]; + uint64_t w[25]; +}; +#pragma pack(pop) + +void hash_permutation(union hash_state *state); +void hash_process(union hash_state *state, const uint8_t *buf, size_t count); + +#endif + +enum { + HASH_SIZE = 64, + HASH_DATA_AREA = 136 +}; + +void cn_fast_hash(const void *data, size_t length, char *hash); +void cn_slow_hash(const void *data, size_t length, char *hash); + +void hash_extra_blake(const void *data, size_t length, char *hash); +void hash_extra_groestl(const void *data, size_t length, char *hash); +void hash_extra_jh(const void *data, size_t length, char *hash); +void hash_extra_skein(const void *data, size_t length, char *hash); + +void tree_hash(const char (*hashes)[HASH_SIZE], size_t count, char *root_hash); diff --git a/src/crypto/flex/cryptonote/crypto/hash.c b/src/crypto/flex/cryptonote/crypto/hash.c new file mode 100644 index 0000000000..f3a16f0c1d --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/hash.c @@ -0,0 +1,24 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include + +#include "hash-ops.h" +#include "c_keccak.h" + +void hash_permutation(union hash_state *state) { + keccakf((uint64_t*)state, 24); +} + +void hash_process(union hash_state *state, const uint8_t *buf, size_t count) { + keccak1600(buf, count, (uint8_t*)state); +} + +void cn_fast_hash(const void *data, size_t length, char *hash) { + union hash_state state; + hash_process(&state, data, length); + memcpy(hash, &state, HASH_SIZE); +} diff --git a/src/crypto/flex/cryptonote/crypto/hash.h b/src/crypto/flex/cryptonote/crypto/hash.h new file mode 100644 index 0000000000..b5ee22114c --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/hash.h @@ -0,0 +1,22 @@ +#pragma once + +#include "hash-ops.h" + +typedef unsigned char BitSequence; +typedef unsigned long long DataLength; + +#ifdef __cplusplus + +#include + +typedef std::string blobdata; + +namespace crypto { +#pragma pack(push, 1) + class hash { + char data[HASH_SIZE]; + }; +#pragma pack(pop) +} + +#endif diff --git a/src/crypto/flex/cryptonote/crypto/int-util.h b/src/crypto/flex/cryptonote/crypto/int-util.h new file mode 100644 index 0000000000..90980ba869 --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/int-util.h @@ -0,0 +1,230 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#pragma once + +#include +#include +#include +#include + +/* + * Create GNU compatible endian macros. We use the values for __LITTLE_ENDIAN + * and __BIG_ENDIAN based on endian.h. + */ +#ifdef __sun +#include +#define LITTLE_ENDIAN 1234 +#define BIG_ENDIAN 4321 +#ifdef _LITTLE_ENDIAN +#define BYTE_ORDER LITTLE_ENDIAN +#else +#define BYTE_ORDER BIG_ENDIAN +#endif /* _LITTLE_ENDIAN */ +#endif /* __sun */ + +#if defined(_MSC_VER) +#include + +//instead of #include +// assume little-endian on Windows +#define LITTLE_ENDIAN 1234 +#define BIG_ENDIAN 4321 +#define BYTE_ORDER LITTLE_ENDIAN + +static inline uint32_t rol32(uint32_t x, int r) { + static_assert(sizeof(uint32_t) == sizeof(unsigned int), "this code assumes 32-bit integers"); + return _rotl(x, r); +} + +static inline uint64_t rol64(uint64_t x, int r) { + return _rotl64(x, r); +} + +#else +#include + +static inline uint32_t rol32(uint32_t x, int r) { + return (x << (r & 31)) | (x >> (-r & 31)); +} + +static inline uint64_t rol64(uint64_t x, int r) { + return (x << (r & 63)) | (x >> (-r & 63)); +} + +#endif + +static inline uint64_t hi_dword(uint64_t val) { + return val >> 32; +} + +static inline uint64_t lo_dword(uint64_t val) { + return val & 0xFFFFFFFF; +} + +static inline uint64_t mul128(uint64_t multiplier, uint64_t multiplicand, uint64_t* product_hi) { + // multiplier = ab = a * 2^32 + b + // multiplicand = cd = c * 2^32 + d + // ab * cd = a * c * 2^64 + (a * d + b * c) * 2^32 + b * d + uint64_t a = hi_dword(multiplier); + uint64_t b = lo_dword(multiplier); + uint64_t c = hi_dword(multiplicand); + uint64_t d = lo_dword(multiplicand); + + uint64_t ac = a * c; + uint64_t ad = a * d; + uint64_t bc = b * c; + uint64_t bd = b * d; + + uint64_t adbc = ad + bc; + uint64_t adbc_carry = adbc < ad ? 1 : 0; + + // multiplier * multiplicand = product_hi * 2^64 + product_lo + uint64_t product_lo = bd + (adbc << 32); + uint64_t product_lo_carry = product_lo < bd ? 1 : 0; + *product_hi = ac + (adbc >> 32) + (adbc_carry << 32) + product_lo_carry; + assert(ac <= *product_hi); + + return product_lo; +} + +static inline uint64_t div_with_reminder(uint64_t dividend, uint32_t divisor, uint32_t* remainder) { + dividend |= ((uint64_t)*remainder) << 32; + *remainder = dividend % divisor; + return dividend / divisor; +} + +// Long division with 2^32 base +static inline uint32_t div128_32(uint64_t dividend_hi, uint64_t dividend_lo, uint32_t divisor, uint64_t* quotient_hi, uint64_t* quotient_lo) { + uint64_t dividend_dwords[4]; + uint32_t remainder = 0; + + dividend_dwords[3] = hi_dword(dividend_hi); + dividend_dwords[2] = lo_dword(dividend_hi); + dividend_dwords[1] = hi_dword(dividend_lo); + dividend_dwords[0] = lo_dword(dividend_lo); + + *quotient_hi = div_with_reminder(dividend_dwords[3], divisor, &remainder) << 32; + *quotient_hi |= div_with_reminder(dividend_dwords[2], divisor, &remainder); + *quotient_lo = div_with_reminder(dividend_dwords[1], divisor, &remainder) << 32; + *quotient_lo |= div_with_reminder(dividend_dwords[0], divisor, &remainder); + + return remainder; +} + +#define IDENT32(x) ((uint32_t) (x)) +#define IDENT64(x) ((uint64_t) (x)) + +#define SWAP32(x) ((((uint32_t) (x) & 0x000000ff) << 24) | \ + (((uint32_t) (x) & 0x0000ff00) << 8) | \ + (((uint32_t) (x) & 0x00ff0000) >> 8) | \ + (((uint32_t) (x) & 0xff000000) >> 24)) +#define SWAP64(x) ((((uint64_t) (x) & 0x00000000000000ff) << 56) | \ + (((uint64_t) (x) & 0x000000000000ff00) << 40) | \ + (((uint64_t) (x) & 0x0000000000ff0000) << 24) | \ + (((uint64_t) (x) & 0x00000000ff000000) << 8) | \ + (((uint64_t) (x) & 0x000000ff00000000) >> 8) | \ + (((uint64_t) (x) & 0x0000ff0000000000) >> 24) | \ + (((uint64_t) (x) & 0x00ff000000000000) >> 40) | \ + (((uint64_t) (x) & 0xff00000000000000) >> 56)) + +static inline uint32_t ident32(uint32_t x) { return x; } +static inline uint64_t ident64(uint64_t x) { return x; } + +static inline uint32_t swap32(uint32_t x) { + x = ((x & 0x00ff00ff) << 8) | ((x & 0xff00ff00) >> 8); + return (x << 16) | (x >> 16); +} +static inline uint64_t swap64(uint64_t x) { + x = ((x & 0x00ff00ff00ff00ff) << 8) | ((x & 0xff00ff00ff00ff00) >> 8); + x = ((x & 0x0000ffff0000ffff) << 16) | ((x & 0xffff0000ffff0000) >> 16); + return (x << 32) | (x >> 32); +} + +#if defined(__GNUC__) +#define UNUSED __attribute__((unused)) +#else +#define UNUSED +#endif +static inline void mem_inplace_ident(void *mem UNUSED, size_t n UNUSED) { } +#undef UNUSED + +static inline void mem_inplace_swap32(void *mem, size_t n) { + size_t i; + for (i = 0; i < n; i++) { + ((uint32_t *) mem)[i] = swap32(((const uint32_t *) mem)[i]); + } +} +static inline void mem_inplace_swap64(void *mem, size_t n) { + size_t i; + for (i = 0; i < n; i++) { + ((uint64_t *) mem)[i] = swap64(((const uint64_t *) mem)[i]); + } +} + +static inline void memcpy_ident32(void *dst, const void *src, size_t n) { + memcpy(dst, src, 4 * n); +} +static inline void memcpy_ident64(void *dst, const void *src, size_t n) { + memcpy(dst, src, 8 * n); +} + +static inline void memcpy_swap32(void *dst, const void *src, size_t n) { + size_t i; + for (i = 0; i < n; i++) { + ((uint32_t *) dst)[i] = swap32(((const uint32_t *) src)[i]); + } +} +static inline void memcpy_swap64(void *dst, const void *src, size_t n) { + size_t i; + for (i = 0; i < n; i++) { + ((uint64_t *) dst)[i] = swap64(((const uint64_t *) src)[i]); + } +} + +#if !defined(BYTE_ORDER) || !defined(LITTLE_ENDIAN) || !defined(BIG_ENDIAN) +#if __STDC_VERSION__ - 0 >= 201112L +static_assert(false, "BYTE_ORDER is undefined. Perhaps, GNU extensions are not enabled"); +#else +#error "BYTE_ORDER is undefined. Perhaps, GNU extensions are not enabled" +#endif +#endif + +#if BYTE_ORDER == LITTLE_ENDIAN +#define SWAP32LE IDENT32 +#define SWAP32BE SWAP32 +#define swap32le ident32 +#define swap32be swap32 +#define mem_inplace_swap32le mem_inplace_ident +#define mem_inplace_swap32be mem_inplace_swap32 +#define memcpy_swap32le memcpy_ident32 +#define memcpy_swap32be memcpy_swap32 +#define SWAP64LE IDENT64 +#define SWAP64BE SWAP64 +#define swap64le ident64 +#define swap64be swap64 +#define mem_inplace_swap64le mem_inplace_ident +#define mem_inplace_swap64be mem_inplace_swap64 +#define memcpy_swap64le memcpy_ident64 +#define memcpy_swap64be memcpy_swap64 +#endif + +#if BYTE_ORDER == BIG_ENDIAN +#define SWAP32BE IDENT32 +#define SWAP32LE SWAP32 +#define swap32be ident32 +#define swap32le swap32 +#define mem_inplace_swap32be mem_inplace_ident +#define mem_inplace_swap32le mem_inplace_swap32 +#define memcpy_swap32be memcpy_ident32 +#define memcpy_swap32le memcpy_swap32 +#define SWAP64BE IDENT64 +#define SWAP64LE SWAP64 +#define swap64be ident64 +#define swap64le swap64 +#define mem_inplace_swap64be mem_inplace_ident +#define mem_inplace_swap64le mem_inplace_swap64 +#define memcpy_swap64be memcpy_ident64 +#define memcpy_swap64le memcpy_swap64 +#endif diff --git a/src/crypto/flex/cryptonote/crypto/oaes_config.h b/src/crypto/flex/cryptonote/crypto/oaes_config.h new file mode 100644 index 0000000000..3fc0e1be5c --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/oaes_config.h @@ -0,0 +1,50 @@ +/* + * --------------------------------------------------------------------------- + * OpenAES License + * --------------------------------------------------------------------------- + * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------------- + */ + +#ifndef _OAES_CONFIG_H +#define _OAES_CONFIG_H + +#ifdef __cplusplus +extern "C" { +#endif + +//#ifndef OAES_HAVE_ISAAC +//#define OAES_HAVE_ISAAC 1 +//#endif // OAES_HAVE_ISAAC + +//#ifndef OAES_DEBUG +//#define OAES_DEBUG 0 +//#endif // OAES_DEBUG + +#ifdef __cplusplus +} +#endif + +#endif // _OAES_CONFIG_H diff --git a/src/crypto/flex/cryptonote/crypto/oaes_lib.c b/src/crypto/flex/cryptonote/crypto/oaes_lib.c new file mode 100644 index 0000000000..29559afc63 --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/oaes_lib.c @@ -0,0 +1,1468 @@ +/* + * --------------------------------------------------------------------------- + * OpenAES License + * --------------------------------------------------------------------------- + * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------------- + */ + +static const char _NR[] = { + 0x4e,0x61,0x62,0x69,0x6c,0x20,0x53,0x2e,0x20, + 0x41,0x6c,0x20,0x52,0x61,0x6d,0x6c,0x69,0x00 +}; + +#include +#include +#include +#ifdef __APPLE__ +#include +#else +#include +#endif +#include +#include +#include + +#ifdef WIN32 +#include +#else +#include +#include +#endif + +#include "oaes_config.h" +#include "oaes_lib.h" + +#ifdef OAES_HAVE_ISAAC +#include "rand.h" +#endif // OAES_HAVE_ISAAC + +#define OAES_RKEY_LEN 4 +#define OAES_COL_LEN 4 +#define OAES_ROUND_BASE 7 + +// the block is padded +#define OAES_FLAG_PAD 0x01 + +#ifndef min +# define min(a,b) (((a)<(b)) ? (a) : (b)) +#endif /* min */ + +// "OAES<8-bit header version><8-bit type><16-bit options><8-bit flags><56-bit reserved>" +static uint8_t oaes_header[OAES_BLOCK_SIZE] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, + /*0*/ 0x4f, 0x41, 0x45, 0x53, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; +static uint8_t oaes_gf_8[] = { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 }; + +static uint8_t oaes_sub_byte_value[16][16] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, + /*0*/ { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 }, + /*1*/ { 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 }, + /*2*/ { 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 }, + /*3*/ { 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 }, + /*4*/ { 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 }, + /*5*/ { 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf }, + /*6*/ { 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 }, + /*7*/ { 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 }, + /*8*/ { 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 }, + /*9*/ { 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb }, + /*a*/ { 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 }, + /*b*/ { 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 }, + /*c*/ { 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a }, + /*d*/ { 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e }, + /*e*/ { 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf }, + /*f*/ { 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 }, +}; + +static uint8_t oaes_inv_sub_byte_value[16][16] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, + /*0*/ { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb }, + /*1*/ { 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb }, + /*2*/ { 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e }, + /*3*/ { 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 }, + /*4*/ { 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 }, + /*5*/ { 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 }, + /*6*/ { 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 }, + /*7*/ { 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b }, + /*8*/ { 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 }, + /*9*/ { 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e }, + /*a*/ { 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b }, + /*b*/ { 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 }, + /*c*/ { 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f }, + /*d*/ { 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef }, + /*e*/ { 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 }, + /*f*/ { 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d }, +}; + +static uint8_t oaes_gf_mul_2[16][16] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, + /*0*/ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e }, + /*1*/ { 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e }, + /*2*/ { 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e }, + /*3*/ { 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e }, + /*4*/ { 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e }, + /*5*/ { 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe }, + /*6*/ { 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde }, + /*7*/ { 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe }, + /*8*/ { 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05 }, + /*9*/ { 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25 }, + /*a*/ { 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45 }, + /*b*/ { 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65 }, + /*c*/ { 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85 }, + /*d*/ { 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5 }, + /*e*/ { 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5 }, + /*f*/ { 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 }, +}; + +static uint8_t oaes_gf_mul_3[16][16] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, + /*0*/ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 }, + /*1*/ { 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21 }, + /*2*/ { 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71 }, + /*3*/ { 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41 }, + /*4*/ { 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1 }, + /*5*/ { 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1 }, + /*6*/ { 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1 }, + /*7*/ { 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81 }, + /*8*/ { 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a }, + /*9*/ { 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba }, + /*a*/ { 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea }, + /*b*/ { 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda }, + /*c*/ { 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a }, + /*d*/ { 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a }, + /*e*/ { 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a }, + /*f*/ { 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a }, +}; + +static uint8_t oaes_gf_mul_9[16][16] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, + /*0*/ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 }, + /*1*/ { 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7 }, + /*2*/ { 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c }, + /*3*/ { 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc }, + /*4*/ { 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01 }, + /*5*/ { 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91 }, + /*6*/ { 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a }, + /*7*/ { 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa }, + /*8*/ { 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b }, + /*9*/ { 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b }, + /*a*/ { 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0 }, + /*b*/ { 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30 }, + /*c*/ { 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed }, + /*d*/ { 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d }, + /*e*/ { 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6 }, + /*f*/ { 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 }, +}; + +static uint8_t oaes_gf_mul_b[16][16] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, + /*0*/ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 }, + /*1*/ { 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9 }, + /*2*/ { 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12 }, + /*3*/ { 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2 }, + /*4*/ { 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f }, + /*5*/ { 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f }, + /*6*/ { 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4 }, + /*7*/ { 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54 }, + /*8*/ { 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e }, + /*9*/ { 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e }, + /*a*/ { 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5 }, + /*b*/ { 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55 }, + /*c*/ { 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68 }, + /*d*/ { 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8 }, + /*e*/ { 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13 }, + /*f*/ { 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 }, +}; + +static uint8_t oaes_gf_mul_d[16][16] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, + /*0*/ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b }, + /*1*/ { 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b }, + /*2*/ { 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0 }, + /*3*/ { 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20 }, + /*4*/ { 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26 }, + /*5*/ { 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6 }, + /*6*/ { 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d }, + /*7*/ { 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d }, + /*8*/ { 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91 }, + /*9*/ { 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41 }, + /*a*/ { 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a }, + /*b*/ { 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa }, + /*c*/ { 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc }, + /*d*/ { 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c }, + /*e*/ { 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47 }, + /*f*/ { 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 }, +}; + +static uint8_t oaes_gf_mul_e[16][16] = { + // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f, + /*0*/ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a }, + /*1*/ { 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba }, + /*2*/ { 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81 }, + /*3*/ { 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61 }, + /*4*/ { 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7 }, + /*5*/ { 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17 }, + /*6*/ { 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c }, + /*7*/ { 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc }, + /*8*/ { 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b }, + /*9*/ { 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb }, + /*a*/ { 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0 }, + /*b*/ { 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20 }, + /*c*/ { 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6 }, + /*d*/ { 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56 }, + /*e*/ { 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d }, + /*f*/ { 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d }, +}; + +static OAES_RET oaes_sub_byte( uint8_t * byte ) +{ + size_t _x, _y; + + if( NULL == byte ) + return OAES_RET_ARG1; + + _x = _y = *byte; + _x &= 0x0f; + _y &= 0xf0; + _y >>= 4; + *byte = oaes_sub_byte_value[_y][_x]; + + return OAES_RET_SUCCESS; +} + +static OAES_RET oaes_inv_sub_byte( uint8_t * byte ) +{ + size_t _x, _y; + + if( NULL == byte ) + return OAES_RET_ARG1; + + _x = _y = *byte; + _x &= 0x0f; + _y &= 0xf0; + _y >>= 4; + *byte = oaes_inv_sub_byte_value[_y][_x]; + + return OAES_RET_SUCCESS; +} +/* +static OAES_RET oaes_word_rot_right( uint8_t word[OAES_COL_LEN] ) +{ + uint8_t _temp[OAES_COL_LEN]; + + if( NULL == word ) + return OAES_RET_ARG1; + + memcpy( _temp + 1, word, OAES_COL_LEN - 1 ); + _temp[0] = word[OAES_COL_LEN - 1]; + memcpy( word, _temp, OAES_COL_LEN ); + + return OAES_RET_SUCCESS; +} +*/ +static OAES_RET oaes_word_rot_left( uint8_t word[OAES_COL_LEN] ) +{ + uint8_t _temp[OAES_COL_LEN]; + + if( NULL == word ) + return OAES_RET_ARG1; + + memcpy( _temp, word + 1, OAES_COL_LEN - 1 ); + _temp[OAES_COL_LEN - 1] = word[0]; + memcpy( word, _temp, OAES_COL_LEN ); + + return OAES_RET_SUCCESS; +} + +static OAES_RET oaes_shift_rows( uint8_t block[OAES_BLOCK_SIZE] ) +{ + uint8_t _temp[OAES_BLOCK_SIZE]; + + if( NULL == block ) + return OAES_RET_ARG1; + + _temp[0x00] = block[0x00]; + _temp[0x01] = block[0x05]; + _temp[0x02] = block[0x0a]; + _temp[0x03] = block[0x0f]; + _temp[0x04] = block[0x04]; + _temp[0x05] = block[0x09]; + _temp[0x06] = block[0x0e]; + _temp[0x07] = block[0x03]; + _temp[0x08] = block[0x08]; + _temp[0x09] = block[0x0d]; + _temp[0x0a] = block[0x02]; + _temp[0x0b] = block[0x07]; + _temp[0x0c] = block[0x0c]; + _temp[0x0d] = block[0x01]; + _temp[0x0e] = block[0x06]; + _temp[0x0f] = block[0x0b]; + memcpy( block, _temp, OAES_BLOCK_SIZE ); + + return OAES_RET_SUCCESS; +} + +static OAES_RET oaes_inv_shift_rows( uint8_t block[OAES_BLOCK_SIZE] ) +{ + uint8_t _temp[OAES_BLOCK_SIZE]; + + if( NULL == block ) + return OAES_RET_ARG1; + + _temp[0x00] = block[0x00]; + _temp[0x01] = block[0x0d]; + _temp[0x02] = block[0x0a]; + _temp[0x03] = block[0x07]; + _temp[0x04] = block[0x04]; + _temp[0x05] = block[0x01]; + _temp[0x06] = block[0x0e]; + _temp[0x07] = block[0x0b]; + _temp[0x08] = block[0x08]; + _temp[0x09] = block[0x05]; + _temp[0x0a] = block[0x02]; + _temp[0x0b] = block[0x0f]; + _temp[0x0c] = block[0x0c]; + _temp[0x0d] = block[0x09]; + _temp[0x0e] = block[0x06]; + _temp[0x0f] = block[0x03]; + memcpy( block, _temp, OAES_BLOCK_SIZE ); + + return OAES_RET_SUCCESS; +} + +static uint8_t oaes_gf_mul(uint8_t left, uint8_t right) +{ + size_t _x, _y; + + _x = _y = left; + _x &= 0x0f; + _y &= 0xf0; + _y >>= 4; + + switch( right ) + { + case 0x02: + return oaes_gf_mul_2[_y][_x]; + break; + case 0x03: + return oaes_gf_mul_3[_y][_x]; + break; + case 0x09: + return oaes_gf_mul_9[_y][_x]; + break; + case 0x0b: + return oaes_gf_mul_b[_y][_x]; + break; + case 0x0d: + return oaes_gf_mul_d[_y][_x]; + break; + case 0x0e: + return oaes_gf_mul_e[_y][_x]; + break; + default: + return left; + break; + } +} + +static OAES_RET oaes_mix_cols( uint8_t word[OAES_COL_LEN] ) +{ + uint8_t _temp[OAES_COL_LEN]; + + if( NULL == word ) + return OAES_RET_ARG1; + + _temp[0] = oaes_gf_mul(word[0], 0x02) ^ oaes_gf_mul( word[1], 0x03 ) ^ + word[2] ^ word[3]; + _temp[1] = word[0] ^ oaes_gf_mul( word[1], 0x02 ) ^ + oaes_gf_mul( word[2], 0x03 ) ^ word[3]; + _temp[2] = word[0] ^ word[1] ^ + oaes_gf_mul( word[2], 0x02 ) ^ oaes_gf_mul( word[3], 0x03 ); + _temp[3] = oaes_gf_mul( word[0], 0x03 ) ^ word[1] ^ + word[2] ^ oaes_gf_mul( word[3], 0x02 ); + memcpy( word, _temp, OAES_COL_LEN ); + + return OAES_RET_SUCCESS; +} + +static OAES_RET oaes_inv_mix_cols( uint8_t word[OAES_COL_LEN] ) +{ + uint8_t _temp[OAES_COL_LEN]; + + if( NULL == word ) + return OAES_RET_ARG1; + + _temp[0] = oaes_gf_mul( word[0], 0x0e ) ^ oaes_gf_mul( word[1], 0x0b ) ^ + oaes_gf_mul( word[2], 0x0d ) ^ oaes_gf_mul( word[3], 0x09 ); + _temp[1] = oaes_gf_mul( word[0], 0x09 ) ^ oaes_gf_mul( word[1], 0x0e ) ^ + oaes_gf_mul( word[2], 0x0b ) ^ oaes_gf_mul( word[3], 0x0d ); + _temp[2] = oaes_gf_mul( word[0], 0x0d ) ^ oaes_gf_mul( word[1], 0x09 ) ^ + oaes_gf_mul( word[2], 0x0e ) ^ oaes_gf_mul( word[3], 0x0b ); + _temp[3] = oaes_gf_mul( word[0], 0x0b ) ^ oaes_gf_mul( word[1], 0x0d ) ^ + oaes_gf_mul( word[2], 0x09 ) ^ oaes_gf_mul( word[3], 0x0e ); + memcpy( word, _temp, OAES_COL_LEN ); + + return OAES_RET_SUCCESS; +} + +OAES_RET oaes_sprintf( + char * buf, size_t * buf_len, const uint8_t * data, size_t data_len ) +{ + size_t _i, _buf_len_in; + char _temp[4]; + + if( NULL == buf_len ) + return OAES_RET_ARG2; + + _buf_len_in = *buf_len; + *buf_len = data_len * 3 + data_len / OAES_BLOCK_SIZE + 1; + + if( NULL == buf ) + return OAES_RET_SUCCESS; + + if( *buf_len > _buf_len_in ) + return OAES_RET_BUF; + + if( NULL == data ) + return OAES_RET_ARG3; + + strcpy( buf, "" ); + + for( _i = 0; _i < data_len; _i++ ) + { + sprintf( _temp, "%02x ", data[_i] ); + strcat( buf, _temp ); + if( _i && 0 == ( _i + 1 ) % OAES_BLOCK_SIZE ) + strcat( buf, "\n" ); + } + + return OAES_RET_SUCCESS; +} + +#ifdef OAES_HAVE_ISAAC +static void oaes_get_seed( char buf[RANDSIZ + 1] ) +{ + struct timeb timer; + struct tm *gmTimer; + char * _test = NULL; + + ftime (&timer); + gmTimer = gmtime( &timer.time ); + _test = (char *) calloc( sizeof( char ), timer.millitm ); + sprintf( buf, "%04d%02d%02d%02d%02d%02d%03d%p%d", + gmTimer->tm_year + 1900, gmTimer->tm_mon + 1, gmTimer->tm_mday, + gmTimer->tm_hour, gmTimer->tm_min, gmTimer->tm_sec, timer.millitm, + _test + timer.millitm, getpid() ); + + if( _test ) + free( _test ); +} +#else +static uint32_t oaes_get_seed(void) +{ + struct timeb timer; + struct tm *gmTimer; + char * _test = NULL; + uint32_t _ret = 0; + + ftime (&timer); + gmTimer = gmtime( &timer.time ); + _test = (char *) calloc( sizeof( char ), timer.millitm ); + _ret = (uint32_t)(gmTimer->tm_year + 1900 + gmTimer->tm_mon + 1 + gmTimer->tm_mday + + gmTimer->tm_hour + gmTimer->tm_min + gmTimer->tm_sec + timer.millitm + + (uintptr_t) ( _test + timer.millitm ) + getpid()); + + if( _test ) + free( _test ); + + return _ret; +} +#endif // OAES_HAVE_ISAAC + +static OAES_RET oaes_key_destroy( oaes_key ** key ) +{ + if( NULL == *key ) + return OAES_RET_SUCCESS; + + if( (*key)->data ) + { + free( (*key)->data ); + (*key)->data = NULL; + } + + if( (*key)->exp_data ) + { + free( (*key)->exp_data ); + (*key)->exp_data = NULL; + } + + (*key)->data_len = 0; + (*key)->exp_data_len = 0; + (*key)->num_keys = 0; + (*key)->key_base = 0; + free( *key ); + *key = NULL; + + return OAES_RET_SUCCESS; +} + +static OAES_RET oaes_key_expand( OAES_CTX * ctx ) +{ + size_t _i, _j; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == _ctx->key ) + return OAES_RET_NOKEY; + + _ctx->key->key_base = _ctx->key->data_len / OAES_RKEY_LEN; + _ctx->key->num_keys = _ctx->key->key_base + OAES_ROUND_BASE; + + _ctx->key->exp_data_len = _ctx->key->num_keys * OAES_RKEY_LEN * OAES_COL_LEN; + _ctx->key->exp_data = (uint8_t *) + calloc( _ctx->key->exp_data_len, sizeof( uint8_t )); + + if( NULL == _ctx->key->exp_data ) + return OAES_RET_MEM; + + // the first _ctx->key->data_len are a direct copy + memcpy( _ctx->key->exp_data, _ctx->key->data, _ctx->key->data_len ); + + // apply ExpandKey algorithm for remainder + for( _i = _ctx->key->key_base; _i < _ctx->key->num_keys * OAES_RKEY_LEN; _i++ ) + { + uint8_t _temp[OAES_COL_LEN]; + + memcpy( _temp, + _ctx->key->exp_data + ( _i - 1 ) * OAES_RKEY_LEN, OAES_COL_LEN ); + + // transform key column + if( 0 == _i % _ctx->key->key_base ) + { + oaes_word_rot_left( _temp ); + + for( _j = 0; _j < OAES_COL_LEN; _j++ ) + oaes_sub_byte( _temp + _j ); + + _temp[0] = _temp[0] ^ oaes_gf_8[ _i / _ctx->key->key_base - 1 ]; + } + else if( _ctx->key->key_base > 6 && 4 == _i % _ctx->key->key_base ) + { + for( _j = 0; _j < OAES_COL_LEN; _j++ ) + oaes_sub_byte( _temp + _j ); + } + + for( _j = 0; _j < OAES_COL_LEN; _j++ ) + { + _ctx->key->exp_data[ _i * OAES_RKEY_LEN + _j ] = + _ctx->key->exp_data[ ( _i - _ctx->key->key_base ) * + OAES_RKEY_LEN + _j ] ^ _temp[_j]; + } + } + + return OAES_RET_SUCCESS; +} + +static OAES_RET oaes_key_gen( OAES_CTX * ctx, size_t key_size ) +{ + size_t _i; + oaes_key * _key = NULL; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + OAES_RET _rc = OAES_RET_SUCCESS; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + _key = (oaes_key *) calloc( sizeof( oaes_key ), 1 ); + + if( NULL == _key ) + return OAES_RET_MEM; + + if( _ctx->key ) + oaes_key_destroy( &(_ctx->key) ); + + _key->data_len = key_size; + _key->data = (uint8_t *) calloc( key_size, sizeof( uint8_t )); + + if( NULL == _key->data ) + return OAES_RET_MEM; + + for( _i = 0; _i < key_size; _i++ ) +#ifdef OAES_HAVE_ISAAC + _key->data[_i] = (uint8_t) rand( _ctx->rctx ); +#else + _key->data[_i] = (uint8_t) rand(); +#endif // OAES_HAVE_ISAAC + + _ctx->key = _key; + _rc = _rc || oaes_key_expand( ctx ); + + if( _rc != OAES_RET_SUCCESS ) + { + oaes_key_destroy( &(_ctx->key) ); + return _rc; + } + + return OAES_RET_SUCCESS; +} + +OAES_RET oaes_key_gen_128( OAES_CTX * ctx ) +{ + return oaes_key_gen( ctx, 16 ); +} + +OAES_RET oaes_key_gen_192( OAES_CTX * ctx ) +{ + return oaes_key_gen( ctx, 24 ); +} + +OAES_RET oaes_key_gen_256( OAES_CTX * ctx ) +{ + return oaes_key_gen( ctx, 32 ); +} + +OAES_RET oaes_key_export( OAES_CTX * ctx, + uint8_t * data, size_t * data_len ) +{ + size_t _data_len_in; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == _ctx->key ) + return OAES_RET_NOKEY; + + if( NULL == data_len ) + return OAES_RET_ARG3; + + _data_len_in = *data_len; + // data + header + *data_len = _ctx->key->data_len + OAES_BLOCK_SIZE; + + if( NULL == data ) + return OAES_RET_SUCCESS; + + if( _data_len_in < *data_len ) + return OAES_RET_BUF; + + // header + memcpy( data, oaes_header, OAES_BLOCK_SIZE ); + data[5] = 0x01; + data[7] = (uint8_t)_ctx->key->data_len; + memcpy( data + OAES_BLOCK_SIZE, _ctx->key->data, _ctx->key->data_len ); + + return OAES_RET_SUCCESS; +} + +OAES_RET oaes_key_export_data( OAES_CTX * ctx, + uint8_t * data, size_t * data_len ) +{ + size_t _data_len_in; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == _ctx->key ) + return OAES_RET_NOKEY; + + if( NULL == data_len ) + return OAES_RET_ARG3; + + _data_len_in = *data_len; + *data_len = _ctx->key->data_len; + + if( NULL == data ) + return OAES_RET_SUCCESS; + + if( _data_len_in < *data_len ) + return OAES_RET_BUF; + + memcpy( data, _ctx->key->data, *data_len ); + + return OAES_RET_SUCCESS; +} + +OAES_RET oaes_key_import( OAES_CTX * ctx, + const uint8_t * data, size_t data_len ) +{ + oaes_ctx * _ctx = (oaes_ctx *) ctx; + OAES_RET _rc = OAES_RET_SUCCESS; + int _key_length; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == data ) + return OAES_RET_ARG2; + + switch( data_len ) + { + case 16 + OAES_BLOCK_SIZE: + case 24 + OAES_BLOCK_SIZE: + case 32 + OAES_BLOCK_SIZE: + break; + default: + return OAES_RET_ARG3; + } + + // header + if( 0 != memcmp( data, oaes_header, 4 ) ) + return OAES_RET_HEADER; + + // header version + switch( data[4] ) + { + case 0x01: + break; + default: + return OAES_RET_HEADER; + } + + // header type + switch( data[5] ) + { + case 0x01: + break; + default: + return OAES_RET_HEADER; + } + + // options + _key_length = data[7]; + switch( _key_length ) + { + case 16: + case 24: + case 32: + break; + default: + return OAES_RET_HEADER; + } + + if( (int)data_len != _key_length + OAES_BLOCK_SIZE ) + return OAES_RET_ARG3; + + if( _ctx->key ) + oaes_key_destroy( &(_ctx->key) ); + + _ctx->key = (oaes_key *) calloc( sizeof( oaes_key ), 1 ); + + if( NULL == _ctx->key ) + return OAES_RET_MEM; + + _ctx->key->data_len = _key_length; + _ctx->key->data = (uint8_t *) + calloc( _key_length, sizeof( uint8_t )); + + if( NULL == _ctx->key->data ) + { + oaes_key_destroy( &(_ctx->key) ); + return OAES_RET_MEM; + } + + memcpy( _ctx->key->data, data + OAES_BLOCK_SIZE, _key_length ); + _rc = _rc || oaes_key_expand( ctx ); + + if( _rc != OAES_RET_SUCCESS ) + { + oaes_key_destroy( &(_ctx->key) ); + return _rc; + } + + return OAES_RET_SUCCESS; +} + +OAES_RET oaes_key_import_data( OAES_CTX * ctx, + const uint8_t * data, size_t data_len ) +{ + oaes_ctx * _ctx = (oaes_ctx *) ctx; + OAES_RET _rc = OAES_RET_SUCCESS; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == data ) + return OAES_RET_ARG2; + + switch( data_len ) + { + case 16: + case 24: + case 32: + break; + default: + return OAES_RET_ARG3; + } + + if( _ctx->key ) + oaes_key_destroy( &(_ctx->key) ); + + _ctx->key = (oaes_key *) calloc( sizeof( oaes_key ), 1 ); + + if( NULL == _ctx->key ) + return OAES_RET_MEM; + + _ctx->key->data_len = data_len; + _ctx->key->data = (uint8_t *) + calloc( data_len, sizeof( uint8_t )); + + if( NULL == _ctx->key->data ) + { + oaes_key_destroy( &(_ctx->key) ); + return OAES_RET_MEM; + } + + memcpy( _ctx->key->data, data, data_len ); + _rc = _rc || oaes_key_expand( ctx ); + + if( _rc != OAES_RET_SUCCESS ) + { + oaes_key_destroy( &(_ctx->key) ); + return _rc; + } + + return OAES_RET_SUCCESS; +} + +OAES_CTX * oaes_alloc(void) +{ + oaes_ctx * _ctx = (oaes_ctx *) calloc( sizeof( oaes_ctx ), 1 ); + + if( NULL == _ctx ) + return NULL; + +#ifdef OAES_HAVE_ISAAC + { + ub4 _i = 0; + char _seed[RANDSIZ + 1]; + + _ctx->rctx = (randctx *) calloc( sizeof( randctx ), 1 ); + + if( NULL == _ctx->rctx ) + { + free( _ctx ); + return NULL; + } + + oaes_get_seed( _seed ); + memset( _ctx->rctx->randrsl, 0, RANDSIZ ); + memcpy( _ctx->rctx->randrsl, _seed, RANDSIZ ); + randinit( _ctx->rctx, TRUE); + } +#else + srand( oaes_get_seed() ); +#endif // OAES_HAVE_ISAAC + + _ctx->key = NULL; + oaes_set_option( _ctx, OAES_OPTION_CBC, NULL ); + +#ifdef OAES_DEBUG + _ctx->step_cb = NULL; + oaes_set_option( _ctx, OAES_OPTION_STEP_OFF, NULL ); +#endif // OAES_DEBUG + + return (OAES_CTX *) _ctx; +} + +OAES_RET oaes_free( OAES_CTX ** ctx ) +{ + oaes_ctx ** _ctx = (oaes_ctx **) ctx; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == *_ctx ) + return OAES_RET_SUCCESS; + + if( (*_ctx)->key ) + oaes_key_destroy( &((*_ctx)->key) ); + +#ifdef OAES_HAVE_ISAAC + if( (*_ctx)->rctx ) + { + free( (*_ctx)->rctx ); + (*_ctx)->rctx = NULL; + } +#endif // OAES_HAVE_ISAAC + + free( *_ctx ); + *_ctx = NULL; + + return OAES_RET_SUCCESS; +} + +OAES_RET oaes_set_option( OAES_CTX * ctx, + OAES_OPTION option, const void * value ) +{ + size_t _i; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + switch( option ) + { + case OAES_OPTION_ECB: + _ctx->options &= ~OAES_OPTION_CBC; + memset( _ctx->iv, 0, OAES_BLOCK_SIZE ); + break; + + case OAES_OPTION_CBC: + _ctx->options &= ~OAES_OPTION_ECB; + if( value ) + memcpy( _ctx->iv, value, OAES_BLOCK_SIZE ); + else + { + for( _i = 0; _i < OAES_BLOCK_SIZE; _i++ ) +#ifdef OAES_HAVE_ISAAC + _ctx->iv[_i] = (uint8_t) rand( _ctx->rctx ); +#else + _ctx->iv[_i] = (uint8_t) rand(); +#endif // OAES_HAVE_ISAAC + } + break; + +#ifdef OAES_DEBUG + + case OAES_OPTION_STEP_ON: + if( value ) + { + _ctx->options &= ~OAES_OPTION_STEP_OFF; + _ctx->step_cb = value; + } + else + { + _ctx->options &= ~OAES_OPTION_STEP_ON; + _ctx->options |= OAES_OPTION_STEP_OFF; + _ctx->step_cb = NULL; + return OAES_RET_ARG3; + } + break; + + case OAES_OPTION_STEP_OFF: + _ctx->options &= ~OAES_OPTION_STEP_ON; + _ctx->step_cb = NULL; + break; + +#endif // OAES_DEBUG + + default: + return OAES_RET_ARG2; + } + + _ctx->options |= option; + + return OAES_RET_SUCCESS; +} + +static OAES_RET oaes_encrypt_block( + OAES_CTX * ctx, uint8_t * c, size_t c_len ) +{ + size_t _i, _j; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == c ) + return OAES_RET_ARG2; + + if( c_len != OAES_BLOCK_SIZE ) + return OAES_RET_ARG3; + + if( NULL == _ctx->key ) + return OAES_RET_NOKEY; + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "input", 1, NULL ); +#endif // OAES_DEBUG + + // AddRoundKey(State, K0) + for( _i = 0; _i < c_len; _i++ ) + c[_i] = c[_i] ^ _ctx->key->exp_data[_i]; + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + { + _ctx->step_cb( _ctx->key->exp_data, "k_sch", 1, NULL ); + _ctx->step_cb( c, "k_add", 1, NULL ); + } +#endif // OAES_DEBUG + + // for round = 1 step 1 to Nr–1 + for( _i = 1; _i < _ctx->key->num_keys - 1; _i++ ) + { + // SubBytes(state) + for( _j = 0; _j < c_len; _j++ ) + oaes_sub_byte( c + _j ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "s_box", _i, NULL ); +#endif // OAES_DEBUG + + // ShiftRows(state) + oaes_shift_rows( c ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "s_row", _i, NULL ); +#endif // OAES_DEBUG + + // MixColumns(state) + oaes_mix_cols( c ); + oaes_mix_cols( c + 4 ); + oaes_mix_cols( c + 8 ); + oaes_mix_cols( c + 12 ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "m_col", _i, NULL ); +#endif // OAES_DEBUG + + // AddRoundKey(state, w[round*Nb, (round+1)*Nb-1]) + for( _j = 0; _j < c_len; _j++ ) + c[_j] = c[_j] ^ + _ctx->key->exp_data[_i * OAES_RKEY_LEN * OAES_COL_LEN + _j]; + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + { + _ctx->step_cb( _ctx->key->exp_data + _i * OAES_RKEY_LEN * OAES_COL_LEN, + "k_sch", _i, NULL ); + _ctx->step_cb( c, "k_add", _i, NULL ); + } +#endif // OAES_DEBUG + + } + + // SubBytes(state) + for( _i = 0; _i < c_len; _i++ ) + oaes_sub_byte( c + _i ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "s_box", _ctx->key->num_keys - 1, NULL ); +#endif // OAES_DEBUG + + // ShiftRows(state) + oaes_shift_rows( c ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "s_row", _ctx->key->num_keys - 1, NULL ); +#endif // OAES_DEBUG + + // AddRoundKey(state, w[Nr*Nb, (Nr+1)*Nb-1]) + for( _i = 0; _i < c_len; _i++ ) + c[_i] = c[_i] ^ _ctx->key->exp_data[ + ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN + _i ]; + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + { + _ctx->step_cb( _ctx->key->exp_data + + ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN, + "k_sch", _ctx->key->num_keys - 1, NULL ); + _ctx->step_cb( c, "output", _ctx->key->num_keys - 1, NULL ); + } +#endif // OAES_DEBUG + + return OAES_RET_SUCCESS; +} + +static OAES_RET oaes_decrypt_block( + OAES_CTX * ctx, uint8_t * c, size_t c_len ) +{ + size_t _i, _j; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == c ) + return OAES_RET_ARG2; + + if( c_len != OAES_BLOCK_SIZE ) + return OAES_RET_ARG3; + + if( NULL == _ctx->key ) + return OAES_RET_NOKEY; + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "iinput", _ctx->key->num_keys - 1, NULL ); +#endif // OAES_DEBUG + + // AddRoundKey(state, w[Nr*Nb, (Nr+1)*Nb-1]) + for( _i = 0; _i < c_len; _i++ ) + c[_i] = c[_i] ^ _ctx->key->exp_data[ + ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN + _i ]; + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + { + _ctx->step_cb( _ctx->key->exp_data + + ( _ctx->key->num_keys - 1 ) * OAES_RKEY_LEN * OAES_COL_LEN, + "ik_sch", _ctx->key->num_keys - 1, NULL ); + _ctx->step_cb( c, "ik_add", _ctx->key->num_keys - 1, NULL ); + } +#endif // OAES_DEBUG + + for( _i = _ctx->key->num_keys - 2; _i > 0; _i-- ) + { + // InvShiftRows(state) + oaes_inv_shift_rows( c ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "is_row", _i, NULL ); +#endif // OAES_DEBUG + + // InvSubBytes(state) + for( _j = 0; _j < c_len; _j++ ) + oaes_inv_sub_byte( c + _j ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "is_box", _i, NULL ); +#endif // OAES_DEBUG + + // AddRoundKey(state, w[round*Nb, (round+1)*Nb-1]) + for( _j = 0; _j < c_len; _j++ ) + c[_j] = c[_j] ^ + _ctx->key->exp_data[_i * OAES_RKEY_LEN * OAES_COL_LEN + _j]; + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + { + _ctx->step_cb( _ctx->key->exp_data + _i * OAES_RKEY_LEN * OAES_COL_LEN, + "ik_sch", _i, NULL ); + _ctx->step_cb( c, "ik_add", _i, NULL ); + } +#endif // OAES_DEBUG + + // InvMixColums(state) + oaes_inv_mix_cols( c ); + oaes_inv_mix_cols( c + 4 ); + oaes_inv_mix_cols( c + 8 ); + oaes_inv_mix_cols( c + 12 ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "im_col", _i, NULL ); +#endif // OAES_DEBUG + + } + + // InvShiftRows(state) + oaes_inv_shift_rows( c ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "is_row", 1, NULL ); +#endif // OAES_DEBUG + + // InvSubBytes(state) + for( _i = 0; _i < c_len; _i++ ) + oaes_inv_sub_byte( c + _i ); + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + _ctx->step_cb( c, "is_box", 1, NULL ); +#endif // OAES_DEBUG + + // AddRoundKey(state, w[0, Nb-1]) + for( _i = 0; _i < c_len; _i++ ) + c[_i] = c[_i] ^ _ctx->key->exp_data[_i]; + +#ifdef OAES_DEBUG + if( _ctx->step_cb ) + { + _ctx->step_cb( _ctx->key->exp_data, "ik_sch", 1, NULL ); + _ctx->step_cb( c, "ioutput", 1, NULL ); + } +#endif // OAES_DEBUG + + return OAES_RET_SUCCESS; +} + +OAES_RET oaes_encrypt( OAES_CTX * ctx, + const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len ) +{ + size_t _i, _j, _c_len_in, _c_data_len; + size_t _pad_len = m_len % OAES_BLOCK_SIZE == 0 ? + 0 : OAES_BLOCK_SIZE - m_len % OAES_BLOCK_SIZE; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + OAES_RET _rc = OAES_RET_SUCCESS; + uint8_t _flags = _pad_len ? OAES_FLAG_PAD : 0; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == m ) + return OAES_RET_ARG2; + + if( NULL == c_len ) + return OAES_RET_ARG5; + + _c_len_in = *c_len; + // data + pad + _c_data_len = m_len + _pad_len; + // header + iv + data + pad + *c_len = 2 * OAES_BLOCK_SIZE + m_len + _pad_len; + + if( NULL == c ) + return OAES_RET_SUCCESS; + + if( _c_len_in < *c_len ) + return OAES_RET_BUF; + + if( NULL == _ctx->key ) + return OAES_RET_NOKEY; + + // header + memcpy(c, oaes_header, OAES_BLOCK_SIZE ); + memcpy(c + 6, &_ctx->options, sizeof(_ctx->options)); + memcpy(c + 8, &_flags, sizeof(_flags)); + // iv + memcpy(c + OAES_BLOCK_SIZE, _ctx->iv, OAES_BLOCK_SIZE ); + // data + memcpy(c + 2 * OAES_BLOCK_SIZE, m, m_len ); + + for( _i = 0; _i < _c_data_len; _i += OAES_BLOCK_SIZE ) + { + uint8_t _block[OAES_BLOCK_SIZE]; + size_t _block_size = min( m_len - _i, OAES_BLOCK_SIZE ); + + memcpy( _block, c + 2 * OAES_BLOCK_SIZE + _i, _block_size ); + + // insert pad + for( _j = 0; _j < OAES_BLOCK_SIZE - _block_size; _j++ ) + _block[ _block_size + _j ] = (uint8_t)(_j + 1); + + // CBC + if( _ctx->options & OAES_OPTION_CBC ) + { + for( _j = 0; _j < OAES_BLOCK_SIZE; _j++ ) + _block[_j] = _block[_j] ^ _ctx->iv[_j]; + } + + _rc = _rc || + oaes_encrypt_block( ctx, _block, OAES_BLOCK_SIZE ); + memcpy( c + 2 * OAES_BLOCK_SIZE + _i, _block, OAES_BLOCK_SIZE ); + + if( _ctx->options & OAES_OPTION_CBC ) + memcpy( _ctx->iv, _block, OAES_BLOCK_SIZE ); + } + + return _rc; +} + +OAES_RET oaes_decrypt( OAES_CTX * ctx, + const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len ) +{ + size_t _i, _j, _m_len_in; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + OAES_RET _rc = OAES_RET_SUCCESS; + uint8_t _iv[OAES_BLOCK_SIZE]; + uint8_t _flags; + OAES_OPTION _options; + + if( NULL == ctx ) + return OAES_RET_ARG1; + + if( NULL == c ) + return OAES_RET_ARG2; + + if( c_len % OAES_BLOCK_SIZE ) + return OAES_RET_ARG3; + + if( NULL == m_len ) + return OAES_RET_ARG5; + + _m_len_in = *m_len; + *m_len = c_len - 2 * OAES_BLOCK_SIZE; + + if( NULL == m ) + return OAES_RET_SUCCESS; + + if( _m_len_in < *m_len ) + return OAES_RET_BUF; + + if( NULL == _ctx->key ) + return OAES_RET_NOKEY; + + // header + if( 0 != memcmp( c, oaes_header, 4 ) ) + return OAES_RET_HEADER; + + // header version + switch( c[4] ) + { + case 0x01: + break; + default: + return OAES_RET_HEADER; + } + + // header type + switch( c[5] ) + { + case 0x02: + break; + default: + return OAES_RET_HEADER; + } + + // options + memcpy(&_options, c + 6, sizeof(_options)); + // validate that all options are valid + if( _options & ~( + OAES_OPTION_ECB + | OAES_OPTION_CBC +#ifdef OAES_DEBUG + | OAES_OPTION_STEP_ON + | OAES_OPTION_STEP_OFF +#endif // OAES_DEBUG + ) ) + return OAES_RET_HEADER; + if( ( _options & OAES_OPTION_ECB ) && + ( _options & OAES_OPTION_CBC ) ) + return OAES_RET_HEADER; + if( _options == OAES_OPTION_NONE ) + return OAES_RET_HEADER; + + // flags + memcpy(&_flags, c + 8, sizeof(_flags)); + // validate that all flags are valid + if( _flags & ~( + OAES_FLAG_PAD + ) ) + return OAES_RET_HEADER; + + // iv + memcpy( _iv, c + OAES_BLOCK_SIZE, OAES_BLOCK_SIZE); + // data + pad + memcpy( m, c + 2 * OAES_BLOCK_SIZE, *m_len ); + + for( _i = 0; _i < *m_len; _i += OAES_BLOCK_SIZE ) + { + if( ( _options & OAES_OPTION_CBC ) && _i > 0 ) + memcpy( _iv, c + OAES_BLOCK_SIZE + _i, OAES_BLOCK_SIZE ); + + _rc = _rc || + oaes_decrypt_block( ctx, m + _i, min( *m_len - _i, OAES_BLOCK_SIZE ) ); + + // CBC + if( _options & OAES_OPTION_CBC ) + { + for( _j = 0; _j < OAES_BLOCK_SIZE; _j++ ) + m[ _i + _j ] = m[ _i + _j ] ^ _iv[_j]; + } + } + + // remove pad + if( _flags & OAES_FLAG_PAD ) + { + int _is_pad = 1; + size_t _temp = (size_t) m[*m_len - 1]; + + if( _temp <= 0x00 || _temp > 0x0f ) + return OAES_RET_HEADER; + for( _i = 0; _i < _temp; _i++ ) + if( m[*m_len - 1 - _i] != _temp - _i ) + _is_pad = 0; + if( _is_pad ) + { + memset( m + *m_len - _temp, 0, _temp ); + *m_len -= _temp; + } + else + return OAES_RET_HEADER; + } + + return OAES_RET_SUCCESS; +} + + +OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c ) +{ + size_t _i; + + if( NULL == key ) + return OAES_RET_ARG1; + + if( NULL == c ) + return OAES_RET_ARG2; + + // SubBytes(state) + for( _i = 0; _i < OAES_BLOCK_SIZE; _i++ ) + oaes_sub_byte( c + _i ); + + // ShiftRows(state) + oaes_shift_rows( c ); + + // MixColumns(state) + oaes_mix_cols( c ); + oaes_mix_cols( c + 4 ); + oaes_mix_cols( c + 8 ); + oaes_mix_cols( c + 12 ); + + // AddRoundKey(State, key) + for( _i = 0; _i < OAES_BLOCK_SIZE; _i++ ) + c[_i] ^= key[_i]; + + return OAES_RET_SUCCESS; +} + +OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c ) +{ + size_t _i; + oaes_ctx * _ctx = (oaes_ctx *) ctx; + + if( NULL == _ctx ) + return OAES_RET_ARG1; + + if( NULL == c ) + return OAES_RET_ARG2; + + if( NULL == _ctx->key ) + return OAES_RET_NOKEY; + + for ( _i = 0; _i < 10; ++_i ) + { + oaes_encryption_round( &_ctx->key->exp_data[_i * OAES_RKEY_LEN * OAES_COL_LEN], c ); + } + + return OAES_RET_SUCCESS; +} diff --git a/src/crypto/flex/cryptonote/crypto/oaes_lib.h b/src/crypto/flex/cryptonote/crypto/oaes_lib.h new file mode 100644 index 0000000000..fd1942822a --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/oaes_lib.h @@ -0,0 +1,215 @@ +/* + * --------------------------------------------------------------------------- + * OpenAES License + * --------------------------------------------------------------------------- + * Copyright (c) 2012, Nabil S. Al Ramli, www.nalramli.com + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * --------------------------------------------------------------------------- + */ + +#ifndef _OAES_LIB_H +#define _OAES_LIB_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 +# ifdef OAES_SHARED +# ifdef oaes_lib_EXPORTS +# define OAES_API __declspec(dllexport) +# else +# define OAES_API __declspec(dllimport) +# endif +# else +# define OAES_API +# endif +#else +# define OAES_API +#endif // WIN32 + +#define OAES_VERSION "0.8.1" +#define OAES_BLOCK_SIZE 16 + +typedef void OAES_CTX; + +typedef enum +{ + OAES_RET_FIRST = 0, + OAES_RET_SUCCESS = 0, + OAES_RET_UNKNOWN, + OAES_RET_ARG1, + OAES_RET_ARG2, + OAES_RET_ARG3, + OAES_RET_ARG4, + OAES_RET_ARG5, + OAES_RET_NOKEY, + OAES_RET_MEM, + OAES_RET_BUF, + OAES_RET_HEADER, + OAES_RET_COUNT +} OAES_RET; + +/* + * oaes_set_option() takes one of these values for its [option] parameter + * some options accept either an optional or a required [value] parameter + */ +// no option +#define OAES_OPTION_NONE 0 +// enable ECB mode, disable CBC mode +#define OAES_OPTION_ECB 1 +// enable CBC mode, disable ECB mode +// value is optional, may pass uint8_t iv[OAES_BLOCK_SIZE] to specify +// the value of the initialization vector, iv +#define OAES_OPTION_CBC 2 + +#ifdef OAES_DEBUG +typedef int ( * oaes_step_cb ) ( + const uint8_t state[OAES_BLOCK_SIZE], + const char * step_name, + int step_count, + void * user_data ); +// enable state stepping mode +// value is required, must pass oaes_step_cb to receive the state at each step +#define OAES_OPTION_STEP_ON 4 +// disable state stepping mode +#define OAES_OPTION_STEP_OFF 8 +#endif // OAES_DEBUG + +typedef uint16_t OAES_OPTION; + +typedef struct _oaes_key +{ + size_t data_len; + uint8_t *data; + size_t exp_data_len; + uint8_t *exp_data; + size_t num_keys; + size_t key_base; +} oaes_key; + +typedef struct _oaes_ctx +{ +#ifdef OAES_HAVE_ISAAC + randctx * rctx; +#endif // OAES_HAVE_ISAAC + +#ifdef OAES_DEBUG + oaes_step_cb step_cb; +#endif // OAES_DEBUG + + oaes_key * key; + OAES_OPTION options; + uint8_t iv[OAES_BLOCK_SIZE]; +} oaes_ctx; +/* + * // usage: + * + * OAES_CTX * ctx = oaes_alloc(); + * . + * . + * . + * { + * oaes_gen_key_xxx( ctx ); + * { + * oaes_key_export( ctx, _buf, &_buf_len ); + * // or + * oaes_key_export_data( ctx, _buf, &_buf_len );\ + * } + * } + * // or + * { + * oaes_key_import( ctx, _buf, _buf_len ); + * // or + * oaes_key_import_data( ctx, _buf, _buf_len ); + * } + * . + * . + * . + * oaes_encrypt( ctx, m, m_len, c, &c_len ); + * . + * . + * . + * oaes_decrypt( ctx, c, c_len, m, &m_len ); + * . + * . + * . + * oaes_free( &ctx ); + */ + +OAES_API OAES_CTX * oaes_alloc(void); + +OAES_API OAES_RET oaes_free( OAES_CTX ** ctx ); + +OAES_API OAES_RET oaes_set_option( OAES_CTX * ctx, + OAES_OPTION option, const void * value ); + +OAES_API OAES_RET oaes_key_gen_128( OAES_CTX * ctx ); + +OAES_API OAES_RET oaes_key_gen_192( OAES_CTX * ctx ); + +OAES_API OAES_RET oaes_key_gen_256( OAES_CTX * ctx ); + +// export key with header information +// set data == NULL to get the required data_len +OAES_API OAES_RET oaes_key_export( OAES_CTX * ctx, + uint8_t * data, size_t * data_len ); + +// directly export the data from key +// set data == NULL to get the required data_len +OAES_API OAES_RET oaes_key_export_data( OAES_CTX * ctx, + uint8_t * data, size_t * data_len ); + +// import key with header information +OAES_API OAES_RET oaes_key_import( OAES_CTX * ctx, + const uint8_t * data, size_t data_len ); + +// directly import data into key +OAES_API OAES_RET oaes_key_import_data( OAES_CTX * ctx, + const uint8_t * data, size_t data_len ); + +// set c == NULL to get the required c_len +OAES_API OAES_RET oaes_encrypt( OAES_CTX * ctx, + const uint8_t * m, size_t m_len, uint8_t * c, size_t * c_len ); + +// set m == NULL to get the required m_len +OAES_API OAES_RET oaes_decrypt( OAES_CTX * ctx, + const uint8_t * c, size_t c_len, uint8_t * m, size_t * m_len ); + +// set buf == NULL to get the required buf_len +OAES_API OAES_RET oaes_sprintf( + char * buf, size_t * buf_len, const uint8_t * data, size_t data_len ); + +OAES_API OAES_RET oaes_encryption_round( const uint8_t * key, uint8_t * c ); + +OAES_API OAES_RET oaes_pseudo_encrypt_ecb( OAES_CTX * ctx, uint8_t * c ); + +#ifdef __cplusplus +} +#endif + +#endif // _OAES_LIB_H diff --git a/src/crypto/flex/cryptonote/crypto/variant2_int_sqrt.h b/src/crypto/flex/cryptonote/crypto/variant2_int_sqrt.h new file mode 100644 index 0000000000..ba5b431aec --- /dev/null +++ b/src/crypto/flex/cryptonote/crypto/variant2_int_sqrt.h @@ -0,0 +1,168 @@ +#ifndef VARIANT2_INT_SQRT_H +#define VARIANT2_INT_SQRT_H + +#include +#include + +#define VARIANT2_INTEGER_MATH_SQRT_STEP_SSE2() \ + do { \ + const __m128i exp_double_bias = _mm_set_epi64x(0, 1023ULL << 52); \ + __m128d x = _mm_castsi128_pd(_mm_add_epi64(_mm_cvtsi64_si128(sqrt_input >> 12), exp_double_bias)); \ + x = _mm_sqrt_sd(_mm_setzero_pd(), x); \ + sqrt_result = (uint64_t)(_mm_cvtsi128_si64(_mm_sub_epi64(_mm_castpd_si128(x), exp_double_bias))) >> 19; \ + } while(0) + +#define VARIANT2_INTEGER_MATH_SQRT_STEP_FP64() \ + do { \ + sqrt_result = sqrt(sqrt_input + 18446744073709551616.0) * 2.0 - 8589934592.0; \ + } while(0) + +//#define VARIANT2_INTEGER_MATH_SQRT_STEP_REF() \ +// sqrt_result = integer_square_root_v2(sqrt_input) + +// Reference implementation of the integer square root for Cryptonight variant 2 +// Computes integer part of "sqrt(2^64 + n) * 2 - 2^33" +// +// In other words, given 64-bit unsigned integer n: +// 1) Write it as x = 1.NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN000... in binary (1 <= x < 2, all 64 bits of n are used) +// 2) Calculate sqrt(x) = 1.0RRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR... (1 <= sqrt(x) < sqrt(2), so it will always start with "1.0" in binary) +// 3) Take 32 bits that come after "1.0" and return them as a 32-bit unsigned integer, discard all remaining bits +// +// Some sample inputs and outputs: +// +// Input | Output | Exact value of "sqrt(2^64 + n) * 2 - 2^33" +// -----------------|------------|------------------------------------------- +// 0 | 0 | 0 +// 2^32 | 0 | 0.99999999994179233909330885695244... +// 2^32 + 1 | 1 | 1.0000000001746229827200734316305... +// 2^50 | 262140 | 262140.00012206565608606978175873... +// 2^55 + 20963331 | 8384515 | 8384515.9999999997673963974959744... +// 2^55 + 20963332 | 8384516 | 8384516 +// 2^62 + 26599786 | 1013904242 | 1013904242.9999999999479374853545... +// 2^62 + 26599787 | 1013904243 | 1013904243.0000000001561875439364... +// 2^64 - 1 | 3558067407 | 3558067407.9041987696409179931096... + +// The reference implementation as it is now uses only unsigned int64 arithmetic, so it can't have undefined behavior +// It was tested once for all edge cases and confirmed correct +// +// !!! Note: if you're modifying this code, uncomment the test in monero/tests/hash/main.cpp !!! +// +/* +static inline uint64_t integer_square_root_v2(uint64_t n) +{ + uint64_t r = 1ULL << 63; + + for (uint64_t bit = 1ULL << 60; bit; bit >>= 2) + { + const bool b = (n < r + bit); + const uint64_t n_next = n - (r + bit); + const uint64_t r_next = r + bit * 2; + n = b ? n : n_next; + r = b ? r : r_next; + r >>= 1; + } + + return r * 2 + ((n > r) ? 1 : 0); +} +*/ + +/* +VARIANT2_INTEGER_MATH_SQRT_FIXUP checks that "r" is an integer part of "sqrt(2^64 + sqrt_input) * 2 - 2^33" and adds or subtracts 1 if needed +It's hard to understand how it works, so here is a full calculation of formulas used in VARIANT2_INTEGER_MATH_SQRT_FIXUP + +The following inequalities must hold for r if it's an integer part of "sqrt(2^64 + sqrt_input) * 2 - 2^33": +1) r <= sqrt(2^64 + sqrt_input) * 2 - 2^33 +2) r + 1 > sqrt(2^64 + sqrt_input) * 2 - 2^33 + +We need to check them using only unsigned integer arithmetic to avoid rounding errors and undefined behavior + +First inequality: r <= sqrt(2^64 + sqrt_input) * 2 - 2^33 +----------------------------------------------------------------------------------- +r <= sqrt(2^64 + sqrt_input) * 2 - 2^33 +r + 2^33 <= sqrt(2^64 + sqrt_input) * 2 +r/2 + 2^32 <= sqrt(2^64 + sqrt_input) +(r/2 + 2^32)^2 <= 2^64 + sqrt_input + +Rewrite r as r = s * 2 + b (s = trunc(r/2), b is 0 or 1) + +((s*2+b)/2 + 2^32)^2 <= 2^64 + sqrt_input +(s*2+b)^2/4 + 2*2^32*(s*2+b)/2 + 2^64 <= 2^64 + sqrt_input +(s*2+b)^2/4 + 2*2^32*(s*2+b)/2 <= sqrt_input +(s*2+b)^2/4 + 2^32*r <= sqrt_input +(s^2*4+2*s*2*b+b^2)/4 + 2^32*r <= sqrt_input +s^2+s*b+b^2/4 + 2^32*r <= sqrt_input +s*(s+b) + b^2/4 + 2^32*r <= sqrt_input + +Let r2 = s*(s+b) + r*2^32 +r2 + b^2/4 <= sqrt_input + +If this inequality doesn't hold, then we must decrement r: IF "r2 + b^2/4 > sqrt_input" THEN r = r - 1 + +b can be 0 or 1 +If b is 0 then we need to compare "r2 > sqrt_input" +If b is 1 then b^2/4 = 0.25, so we need to compare "r2 + 0.25 > sqrt_input" +Since both r2 and sqrt_input are integers, we can safely replace it with "r2 + 1 > sqrt_input" +----------------------------------------------------------------------------------- +Both cases can be merged to a single expression "r2 + b > sqrt_input" +----------------------------------------------------------------------------------- +There will be no overflow when calculating "r2 + b", so it's safe to compare with sqrt_input: +r2 + b = s*(s+b) + r*2^32 + b +The largest value s, b and r can have is s = 1779033703, b = 1, r = 3558067407 when sqrt_input = 2^64 - 1 +r2 + b <= 1779033703*1779033704 + 3558067407*2^32 + 1 = 18446744068217447385 < 2^64 + +Second inequality: r + 1 > sqrt(2^64 + sqrt_input) * 2 - 2^33 +----------------------------------------------------------------------------------- +r + 1 > sqrt(2^64 + sqrt_input) * 2 - 2^33 +r + 1 + 2^33 > sqrt(2^64 + sqrt_input) * 2 +((r+1)/2 + 2^32)^2 > 2^64 + sqrt_input + +Rewrite r as r = s * 2 + b (s = trunc(r/2), b is 0 or 1) + +((s*2+b+1)/2 + 2^32)^2 > 2^64 + sqrt_input +(s*2+b+1)^2/4 + 2*(s*2+b+1)/2*2^32 + 2^64 > 2^64 + sqrt_input +(s*2+b+1)^2/4 + (s*2+b+1)*2^32 > sqrt_input +(s*2+b+1)^2/4 + (r+1)*2^32 > sqrt_input +(s*2+(b+1))^2/4 + r*2^32 + 2^32 > sqrt_input +(s^2*4+2*s*2*(b+1)+(b+1)^2)/4 + r*2^32 + 2^32 > sqrt_input +s^2+s*(b+1)+(b+1)^2/4 + r*2^32 + 2^32 > sqrt_input +s*(s+b) + s + (b+1)^2/4 + r*2^32 + 2^32 > sqrt_input + +Let r2 = s*(s+b) + r*2^32 + +r2 + s + (b+1)^2/4 + 2^32 > sqrt_input +r2 + 2^32 + (b+1)^2/4 > sqrt_input - s + +If this inequality doesn't hold, then we must decrement r: IF "r2 + 2^32 + (b+1)^2/4 <= sqrt_input - s" THEN r = r - 1 +b can be 0 or 1 +If b is 0 then we need to compare "r2 + 2^32 + 1/4 <= sqrt_input - s" which is equal to "r2 + 2^32 < sqrt_input - s" because all numbers here are integers +If b is 1 then (b+1)^2/4 = 1, so we need to compare "r2 + 2^32 + 1 <= sqrt_input - s" which is also equal to "r2 + 2^32 < sqrt_input - s" +----------------------------------------------------------------------------------- +Both cases can be merged to a single expression "r2 + 2^32 < sqrt_input - s" +----------------------------------------------------------------------------------- +There will be no overflow when calculating "r2 + 2^32": +r2 + 2^32 = s*(s+b) + r*2^32 + 2^32 = s*(s+b) + (r+1)*2^32 +The largest value s, b and r can have is s = 1779033703, b = 1, r = 3558067407 when sqrt_input = 2^64 - 1 +r2 + b <= 1779033703*1779033704 + 3558067408*2^32 = 18446744072512414680 < 2^64 + +There will be no integer overflow when calculating "sqrt_input - s", i.e. "sqrt_input >= s" at all times: +s = trunc(r/2) = trunc(sqrt(2^64 + sqrt_input) - 2^32) < sqrt(2^64 + sqrt_input) - 2^32 + 1 +sqrt_input > sqrt(2^64 + sqrt_input) - 2^32 + 1 +sqrt_input + 2^32 - 1 > sqrt(2^64 + sqrt_input) +(sqrt_input + 2^32 - 1)^2 > sqrt_input + 2^64 +sqrt_input^2 + 2*sqrt_input*(2^32 - 1) + (2^32-1)^2 > sqrt_input + 2^64 +sqrt_input^2 + sqrt_input*(2^33 - 2) + (2^32-1)^2 > sqrt_input + 2^64 +sqrt_input^2 + sqrt_input*(2^33 - 3) + (2^32-1)^2 > 2^64 +sqrt_input^2 + sqrt_input*(2^33 - 3) + 2^64-2^33+1 > 2^64 +sqrt_input^2 + sqrt_input*(2^33 - 3) - 2^33 + 1 > 0 +This inequality is true if sqrt_input > 1 and it's easy to check that s = 0 if sqrt_input is 0 or 1, so there will be no integer overflow +*/ + +#define VARIANT2_INTEGER_MATH_SQRT_FIXUP(r) \ + do { \ + const uint64_t s = r >> 1; \ + const uint64_t b = r & 1; \ + const uint64_t r2 = (uint64_t)(s) * (s + b) + (r << 32); \ + r += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \ + } while(0) + +#endif diff --git a/src/crypto/flex/cryptonote/cryptonight_dark.c b/src/crypto/flex/cryptonote/cryptonight_dark.c new file mode 100644 index 0000000000..32b019683c --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_dark.c @@ -0,0 +1,300 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +// Portions Copyright (c) 2018 The Monero developers +// Portions Copyright (c) 2018 The TurtleCoin Developers + +#include +#include +#include "crypto/oaes_lib.h" +#include "crypto/c_keccak.h" +#include "crypto/c_groestl.h" +#include "crypto/c_blake256.h" +#include "crypto/c_jh.h" +#include "../../cn/c_skein.h" +#include "crypto/int-util.h" +#include "crypto/hash-ops.h" +#include "crypto/variant2_int_sqrt.h" + +#if defined(_MSC_VER) +#include +#endif + +#define MEMORY 524288 /* 512KB - 2^19 */ +#define ITER 262144 /* 2^18 */ +#define ITER_DIV 131072 /* 2^17 */ +#define AES_BLOCK_SIZE 16 +#define AES_KEY_SIZE 32 /*16*/ +#define INIT_SIZE_BLK 8 +#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) +#define CN_INIT (MEMORY / INIT_SIZE_BYTE) +#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE) + +#define VARIANT1_1(p) \ + do if (variant == 1) \ + { \ + const uint8_t tmp = ((const uint8_t*)(p))[11]; \ + static const uint32_t table = 0x75310; \ + const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ + ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \ + } while(0) + +#define VARIANT1_2(p) \ + do if (variant == 1) \ + { \ + ((uint64_t*)p)[1] ^= tweak1_2; \ + } while(0) + +#define VARIANT1_INIT() \ + if (variant == 1 && len < 43) \ + { \ + fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \ + _exit(1); \ + } \ + const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0 + +#define U64(p) ((uint64_t*)(p)) + +#define VARIANT2_INIT(b, state) \ + uint64_t division_result; \ + uint64_t sqrt_result; \ + do if (variant >= 2) \ + { \ + U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \ + U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \ + division_result = state.hs.w[12]; \ + sqrt_result = state.hs.w[13]; \ + } while (0) + +#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \ + do if (variant >= 2) \ + { \ + uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \ + uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \ + uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \ + \ + const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \ + \ + chunk1[0] = chunk3[0] + U64(b + 16)[0]; \ + chunk1[1] = chunk3[1] + U64(b + 16)[1]; \ + \ + chunk3[0] = chunk2[0] + U64(a)[0]; \ + chunk3[1] = chunk2[1] + U64(a)[1]; \ + \ + chunk2[0] = chunk1_old[0] + U64(b)[0]; \ + chunk2[1] = chunk1_old[1] + U64(b)[1]; \ + } while (0) + +#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \ + ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \ + { \ + const uint64_t dividend = ((uint64_t*)(ptr))[1]; \ + const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \ + division_result = ((uint32_t)(dividend / divisor)) + \ + (((uint64_t)(dividend % divisor)) << 32); \ + } \ + const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result + +#define VARIANT2_INTEGER_MATH(b, ptr) \ + do if (variant >= 2) \ + { \ + VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \ + VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \ + VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \ + } while (0) + +#define VARIANT2_2() \ + do if (variant >= 2) { \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \ + hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \ + lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \ + } while (0) + +#pragma pack(push, 1) +union cn_slow_hash_state { + union hash_state hs; + struct { + uint8_t k[64]; + uint8_t init[INIT_SIZE_BYTE]; + }; +}; +#pragma pack(pop) + +static void do_dark_blake_hash(const void* input, size_t len, char* output) { + blake256_hash((uint8_t*)output, input, len); +} + +void do_dark_groestl_hash(const void* input, size_t len, char* output) { + groestl(input, len * 8, (uint8_t*)output); +} + +static void do_dark_jh_hash(const void* input, size_t len, char* output) { + int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output); + assert(SUCCESS == r); +} + +static void do_dark_skein_hash(const void* input, size_t len, char* output) { + int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output); + assert(SKEIN_SUCCESS == r); +} + +static void (* const extra_hashes[3])(const void *, size_t, char *) = { + do_dark_blake_hash, do_dark_groestl_hash, do_dark_skein_hash +}; + +extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); +extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); + +static inline size_t e2i(const uint8_t* a) { + return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1); +} + +static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) { + ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res); +} + +static void sum_half_blocks(uint8_t* a, const uint8_t* b) { + uint64_t a0, a1, b0, b1; + + a0 = SWAP64LE(((uint64_t*) a)[0]); + a1 = SWAP64LE(((uint64_t*) a)[1]); + b0 = SWAP64LE(((uint64_t*) b)[0]); + b1 = SWAP64LE(((uint64_t*) b)[1]); + a0 += b0; + a1 += b1; + ((uint64_t*) a)[0] = SWAP64LE(a0); + ((uint64_t*) a)[1] = SWAP64LE(a1); +} + +static inline void copy_block(uint8_t* dst, const uint8_t* src) { + ((uint64_t*) dst)[0] = ((uint64_t*) src)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) src)[1]; +} + +static void swap_blocks(uint8_t* a, uint8_t* b) { + size_t i; + uint8_t t; + for (i = 0; i < AES_BLOCK_SIZE; i++) { + t = a[i]; + a[i] = b[i]; + b[i] = t; + } +} + +static inline void xor_blocks(uint8_t* a, const uint8_t* b) { + ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0]; + ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1]; +} + +static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { + ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1]; +} + +struct cryptonightdark_ctx { + uint8_t long_state[MEMORY]; + union cn_slow_hash_state state; + uint8_t text[INIT_SIZE_BYTE]; + uint8_t a[AES_BLOCK_SIZE]; + uint8_t b[AES_BLOCK_SIZE * 2]; + uint8_t c[AES_BLOCK_SIZE]; + uint8_t aes_key[AES_KEY_SIZE]; + oaes_ctx* aes_ctx; +}; + +void cryptonightdark_hash(const char* input, char* output, uint32_t len, int variant) { +#if defined(_MSC_VER) + struct cryptonightdark_ctx *ctx = _malloca(sizeof(struct cryptonightdark_ctx)); +#else + struct cryptonightdark_ctx *ctx = alloca(sizeof(struct cryptonightdark_ctx)); +#endif + hash_process(&ctx->state.hs, (const uint8_t*) input, len); + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE); + ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); + size_t i, j; + + VARIANT1_INIT(); + VARIANT2_INIT(ctx->b, ctx->state); + + oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j], + &ctx->text[AES_BLOCK_SIZE * j], + ctx->aes_ctx->key->exp_data); + } + memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE); + } + + for (i = 0; i < 16; i++) { + ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i]; + ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i]; + } + + for (i = 0; i < ITER_DIV; i++) { + /* Dependency chain: address -> read value ------+ + * written value <-+ hard function (AES or MUL) <+ + * next address <-+ + */ + /* Iteration 1 */ + j = e2i(ctx->a); + aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]); + VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + /* Iteration 2 */ + j = e2i(ctx->c); + + uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE]; + + uint64_t t[2]; + t[0] = dst[0]; + t[1] = dst[1]; + + VARIANT2_INTEGER_MATH(t, ctx->c); + + uint64_t hi; + uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi); + + VARIANT2_2(); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + + ((uint64_t*)ctx->a)[0] += hi; + ((uint64_t*)ctx->a)[1] += lo; + + dst[0] = ((uint64_t*)ctx->a)[0]; + dst[1] = ((uint64_t*)ctx->a)[1]; + + ((uint64_t*)ctx->a)[0] ^= t[0]; + ((uint64_t*)ctx->a)[1] ^= t[1]; + + VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b); + copy_block(ctx->b, ctx->c); + } + + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + xor_blocks(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]); + aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->text[j * AES_BLOCK_SIZE], + ctx->aes_ctx->key->exp_data); + } + } + memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); + hash_permutation(&ctx->state.hs); + /*memcpy(hash, &state, 32);*/ + extra_hashes[ctx->state.hs.b[0] & 2](&ctx->state, 200, output); + oaes_free((OAES_CTX **) &ctx->aes_ctx); +} + +void cryptonightdark_fast_hash(const char* input, char* output, uint32_t len) { + union hash_state state; + hash_process(&state, (const uint8_t*) input, len); + memcpy(output, &state, HASH_SIZE); +} diff --git a/src/crypto/flex/cryptonote/cryptonight_dark.h b/src/crypto/flex/cryptonote/cryptonight_dark.h new file mode 100644 index 0000000000..c636e336d1 --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_dark.h @@ -0,0 +1,17 @@ +#ifndef CRYPTONIGHTDARK_H +#define CRYPTONIGHTDARK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void cryptonightdark_hash(const char* input, char* output, uint32_t len, int variant); +void cryptonightdark_fast_hash(const char* input, char* output, uint32_t len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/flex/cryptonote/cryptonight_dark_lite.c b/src/crypto/flex/cryptonote/cryptonight_dark_lite.c new file mode 100644 index 0000000000..d2f482de1c --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_dark_lite.c @@ -0,0 +1,300 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +// Portions Copyright (c) 2018 The Monero developers +// Portions Copyright (c) 2018 The darkCoin Developers + +#include +#include +#include "crypto/oaes_lib.h" +#include "crypto/c_keccak.h" +#include "crypto/c_groestl.h" +#include "crypto/c_blake256.h" +#include "crypto/c_jh.h" +#include "../../cn/c_skein.h" +#include "crypto/int-util.h" +#include "crypto/hash-ops.h" +#include "crypto/variant2_int_sqrt.h" + +#if defined(_MSC_VER) +#include +#endif + +#define MEMORY 524288 /* 512KB - 2^19 */ +#define ITER 262144 /* 2^18 */ +#define ITER_DIV 131072 /* 2^17 */ +#define AES_BLOCK_SIZE 16 +#define AES_KEY_SIZE 32 /*16*/ +#define INIT_SIZE_BLK 8 +#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) +#define CN_INIT (MEMORY / INIT_SIZE_BYTE) +#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE) / 2 + +#define VARIANT1_1(p) \ + do if (variant == 1) \ + { \ + const uint8_t tmp = ((const uint8_t*)(p))[11]; \ + static const uint32_t table = 0x75310; \ + const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ + ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \ + } while(0) + +#define VARIANT1_2(p) \ + do if (variant == 1) \ + { \ + ((uint64_t*)p)[1] ^= tweak1_2; \ + } while(0) + +#define VARIANT1_INIT() \ + if (variant == 1 && len < 43) \ + { \ + fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \ + _exit(1); \ + } \ + const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0 + +#define U64(p) ((uint64_t*)(p)) + +#define VARIANT2_INIT(b, state) \ + uint64_t division_result; \ + uint64_t sqrt_result; \ + do if (variant >= 2) \ + { \ + U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \ + U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \ + division_result = state.hs.w[12]; \ + sqrt_result = state.hs.w[13]; \ + } while (0) + +#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \ + do if (variant >= 2) \ + { \ + uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \ + uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \ + uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \ + \ + const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \ + \ + chunk1[0] = chunk3[0] + U64(b + 16)[0]; \ + chunk1[1] = chunk3[1] + U64(b + 16)[1]; \ + \ + chunk3[0] = chunk2[0] + U64(a)[0]; \ + chunk3[1] = chunk2[1] + U64(a)[1]; \ + \ + chunk2[0] = chunk1_old[0] + U64(b)[0]; \ + chunk2[1] = chunk1_old[1] + U64(b)[1]; \ + } while (0) + +#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \ + ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \ + { \ + const uint64_t dividend = ((uint64_t*)(ptr))[1]; \ + const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \ + division_result = ((uint32_t)(dividend / divisor)) + \ + (((uint64_t)(dividend % divisor)) << 32); \ + } \ + const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result + +#define VARIANT2_INTEGER_MATH(b, ptr) \ + do if (variant >= 2) \ + { \ + VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \ + VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \ + VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \ + } while (0) + +#define VARIANT2_2() \ + do if (variant >= 2) { \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \ + hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \ + lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \ + } while (0) + +#pragma pack(push, 1) +union cn_slow_hash_state { + union hash_state hs; + struct { + uint8_t k[64]; + uint8_t init[INIT_SIZE_BYTE]; + }; +}; +#pragma pack(pop) + +static void do_dark_lite_blake_hash(const void* input, size_t len, char* output) { + blake256_hash((uint8_t*)output, input, len); +} + +void do_dark_lite_groestl_hash(const void* input, size_t len, char* output) { + groestl(input, len * 8, (uint8_t*)output); +} + +static void do_dark_lite_jh_hash(const void* input, size_t len, char* output) { + int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output); + assert(SUCCESS == r); +} + +static void do_dark_lite_skein_hash(const void* input, size_t len, char* output) { + int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output); + assert(SKEIN_SUCCESS == r); +} + +static void (* const extra_hashes[3])(const void *, size_t, char *) = { + do_dark_lite_blake_hash, do_dark_lite_groestl_hash, do_dark_lite_skein_hash +}; + +extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); +extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); + +static inline size_t e2i(const uint8_t* a) { + return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1); +} + +static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) { + ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res); +} + +static void sum_half_blocks(uint8_t* a, const uint8_t* b) { + uint64_t a0, a1, b0, b1; + + a0 = SWAP64LE(((uint64_t*) a)[0]); + a1 = SWAP64LE(((uint64_t*) a)[1]); + b0 = SWAP64LE(((uint64_t*) b)[0]); + b1 = SWAP64LE(((uint64_t*) b)[1]); + a0 += b0; + a1 += b1; + ((uint64_t*) a)[0] = SWAP64LE(a0); + ((uint64_t*) a)[1] = SWAP64LE(a1); +} + +static inline void copy_block(uint8_t* dst, const uint8_t* src) { + ((uint64_t*) dst)[0] = ((uint64_t*) src)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) src)[1]; +} + +static void swap_blocks(uint8_t* a, uint8_t* b) { + size_t i; + uint8_t t; + for (i = 0; i < AES_BLOCK_SIZE; i++) { + t = a[i]; + a[i] = b[i]; + b[i] = t; + } +} + +static inline void xor_blocks(uint8_t* a, const uint8_t* b) { + ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0]; + ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1]; +} + +static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { + ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1]; +} + +struct cryptonightdarklite_ctx { + uint8_t long_state[MEMORY]; + union cn_slow_hash_state state; + uint8_t text[INIT_SIZE_BYTE]; + uint8_t a[AES_BLOCK_SIZE]; + uint8_t b[AES_BLOCK_SIZE * 2]; + uint8_t c[AES_BLOCK_SIZE]; + uint8_t aes_key[AES_KEY_SIZE]; + oaes_ctx* aes_ctx; +}; + +void cryptonightdarklite_hash(const char* input, char* output, uint32_t len, int variant) { +#if defined(_MSC_VER) + struct cryptonightdarklite_ctx *ctx = _malloca(sizeof(struct cryptonightdarklite_ctx)); +#else + struct cryptonightdarklite_ctx *ctx = alloca(sizeof(struct cryptonightdarklite_ctx)); +#endif + hash_process(&ctx->state.hs, (const uint8_t*) input, len); + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE); + ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); + size_t i, j; + + VARIANT1_INIT(); + VARIANT2_INIT(ctx->b, ctx->state); + + oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j], + &ctx->text[AES_BLOCK_SIZE * j], + ctx->aes_ctx->key->exp_data); + } + memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE); + } + + for (i = 0; i < 16; i++) { + ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i]; + ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i]; + } + + for (i = 0; i < ITER_DIV; i++) { + /* Dependency chain: address -> read value ------+ + * written value <-+ hard function (AES or MUL) <+ + * next address <-+ + */ + /* Iteration 1 */ + j = e2i(ctx->a); + aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]); + VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + /* Iteration 2 */ + j = e2i(ctx->c); + + uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE]; + + uint64_t t[2]; + t[0] = dst[0]; + t[1] = dst[1]; + + VARIANT2_INTEGER_MATH(t, ctx->c); + + uint64_t hi; + uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi); + + VARIANT2_2(); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + + ((uint64_t*)ctx->a)[0] += hi; + ((uint64_t*)ctx->a)[1] += lo; + + dst[0] = ((uint64_t*)ctx->a)[0]; + dst[1] = ((uint64_t*)ctx->a)[1]; + + ((uint64_t*)ctx->a)[0] ^= t[0]; + ((uint64_t*)ctx->a)[1] ^= t[1]; + + VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b); + copy_block(ctx->b, ctx->c); + } + + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + xor_blocks(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]); + aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->text[j * AES_BLOCK_SIZE], + ctx->aes_ctx->key->exp_data); + } + } + memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); + hash_permutation(&ctx->state.hs); + /*memcpy(hash, &state, 32);*/ + extra_hashes[ctx->state.hs.b[0] & 2](&ctx->state, 200, output); + oaes_free((OAES_CTX **) &ctx->aes_ctx); +} + +void cryptonightdarklite_fast_hash(const char* input, char* output, uint32_t len) { + union hash_state state; + hash_process(&state, (const uint8_t*) input, len); + memcpy(output, &state, HASH_SIZE); +} diff --git a/src/crypto/flex/cryptonote/cryptonight_dark_lite.h b/src/crypto/flex/cryptonote/cryptonight_dark_lite.h new file mode 100644 index 0000000000..1dd6eddf60 --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_dark_lite.h @@ -0,0 +1,17 @@ +#ifndef CRYPTONIGHTDARKLITE_H +#define CRYPTONIGHTDARKLITE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void cryptonightdarklite_hash(const char* input, char* output, uint32_t len, int variant); +void cryptonightdarklite_fast_hash(const char* input, char* output, uint32_t len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/flex/cryptonote/cryptonight_fast.c b/src/crypto/flex/cryptonote/cryptonight_fast.c new file mode 100644 index 0000000000..46f26aff8e --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_fast.c @@ -0,0 +1,297 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +// Portions Copyright (c) 2018 The Monero developers +// Portions Copyright (c) 2018 The TurtleCoin Developers + +#include +#include +#include "crypto/oaes_lib.h" +#include "crypto/c_keccak.h" +#include "crypto/c_groestl.h" +#include "crypto/c_blake256.h" +#include "crypto/c_jh.h" +#include "../../cn/c_skein.h" +#include "crypto/int-util.h" +#include "crypto/hash-ops.h" +#include "crypto/variant2_int_sqrt.h" + +#if defined(_MSC_VER) +#include +#endif + +#define MEMORY 2097152 /* 2 MiB 2^21 */ +#define ITER 524288 /* 2^19 */ +#define ITER_DIV 262144 /* 2^18 */ +#define AES_BLOCK_SIZE 16 +#define AES_KEY_SIZE 32 /*16*/ +#define INIT_SIZE_BLK 8 +#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) +#define CN_INIT (MEMORY / INIT_SIZE_BYTE) +#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE) + +#define VARIANT1_1(p) \ + do if (variant == 1) \ + { \ + const uint8_t tmp = ((const uint8_t*)(p))[11]; \ + static const uint32_t table = 0x75310; \ + const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ + ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \ + } while(0) + +#define VARIANT1_2(p) \ + do if (variant == 1) \ + { \ + ((uint64_t*)p)[1] ^= tweak1_2; \ + } while(0) + +#define VARIANT1_INIT() \ + if (variant == 1 && len < 43) \ + { \ + fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \ + _exit(1); \ + } \ + const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0 + +#define U64(p) ((uint64_t*)(p)) + +#define VARIANT2_INIT(b, state) \ + uint64_t division_result; \ + uint64_t sqrt_result; \ + do if (variant >= 2) \ + { \ + U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \ + U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \ + division_result = state.hs.w[12]; \ + sqrt_result = state.hs.w[13]; \ + } while (0) + +#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \ + do if (variant >= 2) \ + { \ + uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \ + uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \ + uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \ + \ + const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \ + \ + chunk1[0] = chunk3[0] + U64(b + 16)[0]; \ + chunk1[1] = chunk3[1] + U64(b + 16)[1]; \ + \ + chunk3[0] = chunk2[0] + U64(a)[0]; \ + chunk3[1] = chunk2[1] + U64(a)[1]; \ + \ + chunk2[0] = chunk1_old[0] + U64(b)[0]; \ + chunk2[1] = chunk1_old[1] + U64(b)[1]; \ + } while (0) + +#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \ + ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \ + { \ + const uint64_t dividend = ((uint64_t*)(ptr))[1]; \ + const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \ + division_result = ((uint32_t)(dividend / divisor)) + \ + (((uint64_t)(dividend % divisor)) << 32); \ + } \ + const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result + +#define VARIANT2_INTEGER_MATH(b, ptr) \ + do if (variant >= 2) \ + { \ + VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \ + VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \ + VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \ + } while (0) + +#define VARIANT2_2() \ + do if (variant >= 2) { \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \ + hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \ + lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \ + } while (0) + +#pragma pack(push, 1) +union cn_slow_hash_state { + union hash_state hs; + struct { + uint8_t k[64]; + uint8_t init[INIT_SIZE_BYTE]; + }; +}; +#pragma pack(pop) + +static void do_fast_blake_hash(const void* input, size_t len, char* output) { + blake256_hash((uint8_t*)output, input, len); +} + +void do_fast_groestl_hash(const void* input, size_t len, char* output) { + groestl(input, len * 8, (uint8_t*)output); +} + +static void do_fast_jh_hash(const void* input, size_t len, char* output) { + int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output); + assert(SUCCESS == r); +} + +static void do_fast_skein_hash(const void* input, size_t len, char* output) { + int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output); + assert(SKEIN_SUCCESS == r); +} + +static void (* const extra_hashes[3])(const void *, size_t, char *) = { + do_fast_blake_hash, do_fast_groestl_hash, do_fast_skein_hash +}; + +extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); +extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); + +static inline size_t e2i(const uint8_t* a) { + return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1); +} + +static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) { + ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res); +} + +static void sum_half_blocks(uint8_t* a, const uint8_t* b) { + uint64_t a0, a1, b0, b1; + + a0 = SWAP64LE(((uint64_t*) a)[0]); + a1 = SWAP64LE(((uint64_t*) a)[1]); + b0 = SWAP64LE(((uint64_t*) b)[0]); + b1 = SWAP64LE(((uint64_t*) b)[1]); + a0 += b0; + a1 += b1; + ((uint64_t*) a)[0] = SWAP64LE(a0); + ((uint64_t*) a)[1] = SWAP64LE(a1); +} + +static inline void copy_block(uint8_t* dst, const uint8_t* src) { + ((uint64_t*) dst)[0] = ((uint64_t*) src)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) src)[1]; +} + +static void swap_blocks(uint8_t* a, uint8_t* b) { + size_t i; + uint8_t t; + for (i = 0; i < AES_BLOCK_SIZE; i++) { + t = a[i]; + a[i] = b[i]; + b[i] = t; + } +} + +static inline void xor_blocks(uint8_t* a, const uint8_t* b) { + ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0]; + ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1]; +} + +static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { + ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1]; +} + +struct cryptonightfast_ctx { + uint8_t long_state[MEMORY]; + union cn_slow_hash_state state; + uint8_t text[INIT_SIZE_BYTE]; + uint8_t a[AES_BLOCK_SIZE]; + uint8_t b[AES_BLOCK_SIZE * 2]; + uint8_t c[AES_BLOCK_SIZE]; + uint8_t aes_key[AES_KEY_SIZE]; + oaes_ctx* aes_ctx; +}; + +void cryptonightfast_hash(const char* input, char* output, uint32_t len, int variant) { + struct cryptonightfast_ctx *ctx = malloc(sizeof(struct cryptonightfast_ctx)); + hash_process(&ctx->state.hs, (const uint8_t*) input, len); + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE); + ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); + size_t i, j; + + VARIANT1_INIT(); + VARIANT2_INIT(ctx->b, ctx->state); + + oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j], + &ctx->text[AES_BLOCK_SIZE * j], + ctx->aes_ctx->key->exp_data); + } + memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE); + } + + for (i = 0; i < 16; i++) { + ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i]; + ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i]; + } + + for (i = 0; i < ITER_DIV; i++) { + /* Dependency chain: address -> read value ------+ + * written value <-+ hard function (AES or MUL) <+ + * next address <-+ + */ + /* Iteration 1 */ + j = e2i(ctx->a); + aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]); + VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + /* Iteration 2 */ + j = e2i(ctx->c); + + uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE]; + + uint64_t t[2]; + t[0] = dst[0]; + t[1] = dst[1]; + + VARIANT2_INTEGER_MATH(t, ctx->c); + + uint64_t hi; + uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi); + + VARIANT2_2(); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + + ((uint64_t*)ctx->a)[0] += hi; + ((uint64_t*)ctx->a)[1] += lo; + + dst[0] = ((uint64_t*)ctx->a)[0]; + dst[1] = ((uint64_t*)ctx->a)[1]; + + ((uint64_t*)ctx->a)[0] ^= t[0]; + ((uint64_t*)ctx->a)[1] ^= t[1]; + + VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b); + copy_block(ctx->b, ctx->c); + } + + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + xor_blocks(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]); + aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->text[j * AES_BLOCK_SIZE], + ctx->aes_ctx->key->exp_data); + } + } + memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); + hash_permutation(&ctx->state.hs); + /*memcpy(hash, &state, 32);*/ + extra_hashes[ctx->state.hs.b[0] & 2](&ctx->state, 200, output); + oaes_free((OAES_CTX **) &ctx->aes_ctx); + free(ctx); +} + +void cryptonightfast_fast_hash(const char* input, char* output, uint32_t len) { + union hash_state state; + hash_process(&state, (const uint8_t*) input, len); + memcpy(output, &state, HASH_SIZE); +} diff --git a/src/crypto/flex/cryptonote/cryptonight_fast.h b/src/crypto/flex/cryptonote/cryptonight_fast.h new file mode 100644 index 0000000000..897b8ad855 --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_fast.h @@ -0,0 +1,17 @@ +#ifndef CRYPTONIGHTFAST_H +#define CRYPTONIGHTFAST_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void cryptonightfast_hash(const char* input, char* output, uint32_t len, int variant); +void cryptonightfast_fast_hash(const char* input, char* output, uint32_t len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/flex/cryptonote/cryptonight_lite.c b/src/crypto/flex/cryptonote/cryptonight_lite.c new file mode 100644 index 0000000000..d9be39cd9d --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_lite.c @@ -0,0 +1,300 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +// Portions Copyright (c) 2018 The Monero developers +// Portions Copyright (c) 2018 The TurtleCoin Developers + +#include +#include +#include "crypto/oaes_lib.h" +#include "crypto/c_keccak.h" +#include "crypto/c_groestl.h" +#include "crypto/c_blake256.h" +#include "crypto/c_jh.h" +#include "../../cn/c_skein.h" +#include "crypto/int-util.h" +#include "crypto/hash-ops.h" +#include "crypto/variant2_int_sqrt.h" + +#if defined(_MSC_VER) +#include +#endif + +#define MEMORY 1048576 /* 1 MiB - 2^20 */ +#define ITER 524288 /* 2^19 */ +#define ITER_DIV 262144 /* 2^18 */ +#define AES_BLOCK_SIZE 16 +#define AES_KEY_SIZE 32 /*16*/ +#define INIT_SIZE_BLK 8 +#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) +#define CN_INIT (MEMORY / INIT_SIZE_BYTE) +#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE) + +#define VARIANT1_1(p) \ + do if (variant == 1) \ + { \ + const uint8_t tmp = ((const uint8_t*)(p))[11]; \ + static const uint32_t table = 0x75310; \ + const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ + ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \ + } while(0) + +#define VARIANT1_2(p) \ + do if (variant == 1) \ + { \ + ((uint64_t*)p)[1] ^= tweak1_2; \ + } while(0) + +#define VARIANT1_INIT() \ + if (variant == 1 && len < 43) \ + { \ + fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \ + _exit(1); \ + } \ + const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0 + +#define U64(p) ((uint64_t*)(p)) + +#define VARIANT2_INIT(b, state) \ + uint64_t division_result; \ + uint64_t sqrt_result; \ + do if (variant >= 2) \ + { \ + U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \ + U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \ + division_result = state.hs.w[12]; \ + sqrt_result = state.hs.w[13]; \ + } while (0) + +#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \ + do if (variant >= 2) \ + { \ + uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \ + uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \ + uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \ + \ + const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \ + \ + chunk1[0] = chunk3[0] + U64(b + 16)[0]; \ + chunk1[1] = chunk3[1] + U64(b + 16)[1]; \ + \ + chunk3[0] = chunk2[0] + U64(a)[0]; \ + chunk3[1] = chunk2[1] + U64(a)[1]; \ + \ + chunk2[0] = chunk1_old[0] + U64(b)[0]; \ + chunk2[1] = chunk1_old[1] + U64(b)[1]; \ + } while (0) + +#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \ + ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \ + { \ + const uint64_t dividend = ((uint64_t*)(ptr))[1]; \ + const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \ + division_result = ((uint32_t)(dividend / divisor)) + \ + (((uint64_t)(dividend % divisor)) << 32); \ + } \ + const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result + +#define VARIANT2_INTEGER_MATH(b, ptr) \ + do if (variant >= 2) \ + { \ + VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \ + VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \ + VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \ + } while (0) + +#define VARIANT2_2() \ + do if (variant >= 2) { \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \ + hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \ + lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \ + } while (0) + +#pragma pack(push, 1) +union cn_slow_hash_state { + union hash_state hs; + struct { + uint8_t k[64]; + uint8_t init[INIT_SIZE_BYTE]; + }; +}; +#pragma pack(pop) + +static void do_lite_blake_hash(const void* input, size_t len, char* output) { + blake256_hash((uint8_t*)output, input, len); +} + +void do_lite_groestl_hash(const void* input, size_t len, char* output) { + groestl(input, len * 8, (uint8_t*)output); +} + +static void do_lite_jh_hash(const void* input, size_t len, char* output) { + int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output); + assert(SUCCESS == r); +} + +static void do_lite_skein_hash(const void* input, size_t len, char* output) { + int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output); + assert(SKEIN_SUCCESS == r); +} + +static void (* const extra_hashes[3])(const void *, size_t, char *) = { + do_lite_blake_hash, do_lite_groestl_hash, do_lite_skein_hash +}; + +extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); +extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); + +static inline size_t e2i(const uint8_t* a) { + return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1); +} + +static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) { + ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res); +} + +static void sum_half_blocks(uint8_t* a, const uint8_t* b) { + uint64_t a0, a1, b0, b1; + + a0 = SWAP64LE(((uint64_t*) a)[0]); + a1 = SWAP64LE(((uint64_t*) a)[1]); + b0 = SWAP64LE(((uint64_t*) b)[0]); + b1 = SWAP64LE(((uint64_t*) b)[1]); + a0 += b0; + a1 += b1; + ((uint64_t*) a)[0] = SWAP64LE(a0); + ((uint64_t*) a)[1] = SWAP64LE(a1); +} + +static inline void copy_block(uint8_t* dst, const uint8_t* src) { + ((uint64_t*) dst)[0] = ((uint64_t*) src)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) src)[1]; +} + +static void swap_blocks(uint8_t* a, uint8_t* b) { + size_t i; + uint8_t t; + for (i = 0; i < AES_BLOCK_SIZE; i++) { + t = a[i]; + a[i] = b[i]; + b[i] = t; + } +} + +static inline void xor_blocks(uint8_t* a, const uint8_t* b) { + ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0]; + ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1]; +} + +static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { + ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1]; +} + +struct cryptonightlite_ctx { + uint8_t long_state[MEMORY]; + union cn_slow_hash_state state; + uint8_t text[INIT_SIZE_BYTE]; + uint8_t a[AES_BLOCK_SIZE]; + uint8_t b[AES_BLOCK_SIZE * 2]; + uint8_t c[AES_BLOCK_SIZE]; + uint8_t aes_key[AES_KEY_SIZE]; + oaes_ctx* aes_ctx; +}; + +void cryptonightlite_hash(const char* input, char* output, uint32_t len, int variant) { +#if defined(_MSC_VER) + struct cryptonightlite_ctx *ctx = _malloca(sizeof(struct cryptonightlite_ctx)); +#else + struct cryptonightlite_ctx *ctx = alloca(sizeof(struct cryptonightlite_ctx)); +#endif + hash_process(&ctx->state.hs, (const uint8_t*) input, len); + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE); + ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); + size_t i, j; + + VARIANT1_INIT(); + VARIANT2_INIT(ctx->b, ctx->state); + + oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j], + &ctx->text[AES_BLOCK_SIZE * j], + ctx->aes_ctx->key->exp_data); + } + memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE); + } + + for (i = 0; i < 16; i++) { + ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i]; + ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i]; + } + + for (i = 0; i < ITER_DIV; i++) { + /* Dependency chain: address -> read value ------+ + * written value <-+ hard function (AES or MUL) <+ + * next address <-+ + */ + /* Iteration 1 */ + j = e2i(ctx->a); + aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]); + VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + /* Iteration 2 */ + j = e2i(ctx->c); + + uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE]; + + uint64_t t[2]; + t[0] = dst[0]; + t[1] = dst[1]; + + VARIANT2_INTEGER_MATH(t, ctx->c); + + uint64_t hi; + uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi); + + VARIANT2_2(); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + + ((uint64_t*)ctx->a)[0] += hi; + ((uint64_t*)ctx->a)[1] += lo; + + dst[0] = ((uint64_t*)ctx->a)[0]; + dst[1] = ((uint64_t*)ctx->a)[1]; + + ((uint64_t*)ctx->a)[0] ^= t[0]; + ((uint64_t*)ctx->a)[1] ^= t[1]; + + VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b); + copy_block(ctx->b, ctx->c); + } + + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + xor_blocks(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]); + aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->text[j * AES_BLOCK_SIZE], + ctx->aes_ctx->key->exp_data); + } + } + memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); + hash_permutation(&ctx->state.hs); + /*memcpy(hash, &state, 32);*/ + extra_hashes[ctx->state.hs.b[0] & 2](&ctx->state, 200, output); + oaes_free((OAES_CTX **) &ctx->aes_ctx); +} + +void cryptonightlite_fast_hash(const char* input, char* output, uint32_t len) { + union hash_state state; + hash_process(&state, (const uint8_t*) input, len); + memcpy(output, &state, HASH_SIZE); +} diff --git a/src/crypto/flex/cryptonote/cryptonight_lite.h b/src/crypto/flex/cryptonote/cryptonight_lite.h new file mode 100644 index 0000000000..ebe1e45fab --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_lite.h @@ -0,0 +1,17 @@ +#ifndef CRYPTONIGHTLITE_H +#define CRYPTONIGHTLITE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void cryptonightlite_hash(const char* input, char* output, uint32_t len, int variant); +void cryptonightlite_fast_hash(const char* input, char* output, uint32_t len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/flex/cryptonote/cryptonight_turtle.c b/src/crypto/flex/cryptonote/cryptonight_turtle.c new file mode 100644 index 0000000000..5716307342 --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_turtle.c @@ -0,0 +1,300 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +// Portions Copyright (c) 2018 The Monero developers +// Portions Copyright (c) 2018 The TurtleCoin Developers + +#include +#include +#include "crypto/oaes_lib.h" +#include "crypto/c_keccak.h" +#include "crypto/c_groestl.h" +#include "crypto/c_blake256.h" +#include "crypto/c_jh.h" +#include "../../cn/c_skein.h" +#include "crypto/int-util.h" +#include "crypto/hash-ops.h" +#include "crypto/variant2_int_sqrt.h" + +#if defined(_MSC_VER) +#include +#endif + +#define MEMORY 262144 /* 256KB - 2^18 */ +#define ITER 131072 /* 2^17 */ +#define ITER_DIV 65536 /* 2^16 */ +#define AES_BLOCK_SIZE 16 +#define AES_KEY_SIZE 32 /*16*/ +#define INIT_SIZE_BLK 8 +#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) +#define CN_INIT (MEMORY / INIT_SIZE_BYTE) +#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE) + +#define VARIANT1_1(p) \ + do if (variant == 1) \ + { \ + const uint8_t tmp = ((const uint8_t*)(p))[11]; \ + static const uint32_t table = 0x75310; \ + const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ + ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \ + } while(0) + +#define VARIANT1_2(p) \ + do if (variant == 1) \ + { \ + ((uint64_t*)p)[1] ^= tweak1_2; \ + } while(0) + +#define VARIANT1_INIT() \ + if (variant == 1 && len < 43) \ + { \ + fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \ + _exit(1); \ + } \ + const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0 + +#define U64(p) ((uint64_t*)(p)) + +#define VARIANT2_INIT(b, state) \ + uint64_t division_result; \ + uint64_t sqrt_result; \ + do if (variant >= 2) \ + { \ + U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \ + U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \ + division_result = state.hs.w[12]; \ + sqrt_result = state.hs.w[13]; \ + } while (0) + +#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \ + do if (variant >= 2) \ + { \ + uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \ + uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \ + uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \ + \ + const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \ + \ + chunk1[0] = chunk3[0] + U64(b + 16)[0]; \ + chunk1[1] = chunk3[1] + U64(b + 16)[1]; \ + \ + chunk3[0] = chunk2[0] + U64(a)[0]; \ + chunk3[1] = chunk2[1] + U64(a)[1]; \ + \ + chunk2[0] = chunk1_old[0] + U64(b)[0]; \ + chunk2[1] = chunk1_old[1] + U64(b)[1]; \ + } while (0) + +#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \ + ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \ + { \ + const uint64_t dividend = ((uint64_t*)(ptr))[1]; \ + const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \ + division_result = ((uint32_t)(dividend / divisor)) + \ + (((uint64_t)(dividend % divisor)) << 32); \ + } \ + const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result + +#define VARIANT2_INTEGER_MATH(b, ptr) \ + do if (variant >= 2) \ + { \ + VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \ + VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \ + VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \ + } while (0) + +#define VARIANT2_2() \ + do if (variant >= 2) { \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \ + hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \ + lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \ + } while (0) + +#pragma pack(push, 1) +union cn_slow_hash_state { + union hash_state hs; + struct { + uint8_t k[64]; + uint8_t init[INIT_SIZE_BYTE]; + }; +}; +#pragma pack(pop) + +static void do_turtle_blake_hash(const void* input, size_t len, char* output) { + blake256_hash((uint8_t*)output, input, len); +} + +void do_turtle_groestl_hash(const void* input, size_t len, char* output) { + groestl(input, len * 8, (uint8_t*)output); +} + +static void do_turtle_jh_hash(const void* input, size_t len, char* output) { + int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output); + assert(SUCCESS == r); +} + +static void do_turtle_skein_hash(const void* input, size_t len, char* output) { + int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output); + assert(SKEIN_SUCCESS == r); +} + +static void (* const extra_hashes[3])(const void *, size_t, char *) = { + do_turtle_blake_hash, do_turtle_groestl_hash, do_turtle_skein_hash +}; + +extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); +extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); + +static inline size_t e2i(const uint8_t* a) { + return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1); +} + +static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) { + ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res); +} + +static void sum_half_blocks(uint8_t* a, const uint8_t* b) { + uint64_t a0, a1, b0, b1; + + a0 = SWAP64LE(((uint64_t*) a)[0]); + a1 = SWAP64LE(((uint64_t*) a)[1]); + b0 = SWAP64LE(((uint64_t*) b)[0]); + b1 = SWAP64LE(((uint64_t*) b)[1]); + a0 += b0; + a1 += b1; + ((uint64_t*) a)[0] = SWAP64LE(a0); + ((uint64_t*) a)[1] = SWAP64LE(a1); +} + +static inline void copy_block(uint8_t* dst, const uint8_t* src) { + ((uint64_t*) dst)[0] = ((uint64_t*) src)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) src)[1]; +} + +static void swap_blocks(uint8_t* a, uint8_t* b) { + size_t i; + uint8_t t; + for (i = 0; i < AES_BLOCK_SIZE; i++) { + t = a[i]; + a[i] = b[i]; + b[i] = t; + } +} + +static inline void xor_blocks(uint8_t* a, const uint8_t* b) { + ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0]; + ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1]; +} + +static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { + ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1]; +} + +struct cryptonightturtle_ctx { + uint8_t long_state[MEMORY]; + union cn_slow_hash_state state; + uint8_t text[INIT_SIZE_BYTE]; + uint8_t a[AES_BLOCK_SIZE]; + uint8_t b[AES_BLOCK_SIZE * 2]; + uint8_t c[AES_BLOCK_SIZE]; + uint8_t aes_key[AES_KEY_SIZE]; + oaes_ctx* aes_ctx; +}; + +void cryptonightturtle_hash(const char* input, char* output, uint32_t len, int variant) { +#if defined(_MSC_VER) + struct cryptonightturtle_ctx *ctx = _malloca(sizeof(struct cryptonightturtle_ctx)); +#else + struct cryptonightturtle_ctx *ctx = alloca(sizeof(struct cryptonightturtle_ctx)); +#endif + hash_process(&ctx->state.hs, (const uint8_t*) input, len); + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE); + ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); + size_t i, j; + + VARIANT1_INIT(); + VARIANT2_INIT(ctx->b, ctx->state); + + oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j], + &ctx->text[AES_BLOCK_SIZE * j], + ctx->aes_ctx->key->exp_data); + } + memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE); + } + + for (i = 0; i < 16; i++) { + ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i]; + ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i]; + } + + for (i = 0; i < ITER_DIV; i++) { + /* Dependency chain: address -> read value ------+ + * written value <-+ hard function (AES or MUL) <+ + * next address <-+ + */ + /* Iteration 1 */ + j = e2i(ctx->a); + aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]); + VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + /* Iteration 2 */ + j = e2i(ctx->c); + + uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE]; + + uint64_t t[2]; + t[0] = dst[0]; + t[1] = dst[1]; + + VARIANT2_INTEGER_MATH(t, ctx->c); + + uint64_t hi; + uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi); + + VARIANT2_2(); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + + ((uint64_t*)ctx->a)[0] += hi; + ((uint64_t*)ctx->a)[1] += lo; + + dst[0] = ((uint64_t*)ctx->a)[0]; + dst[1] = ((uint64_t*)ctx->a)[1]; + + ((uint64_t*)ctx->a)[0] ^= t[0]; + ((uint64_t*)ctx->a)[1] ^= t[1]; + + VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b); + copy_block(ctx->b, ctx->c); + } + + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + xor_blocks(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]); + aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->text[j * AES_BLOCK_SIZE], + ctx->aes_ctx->key->exp_data); + } + } + memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); + hash_permutation(&ctx->state.hs); + /*memcpy(hash, &state, 32);*/ + extra_hashes[ctx->state.hs.b[0] & 2](&ctx->state, 200, output); + oaes_free((OAES_CTX **) &ctx->aes_ctx); +} + +void cryptonightturtle_fast_hash(const char* input, char* output, uint32_t len) { + union hash_state state; + hash_process(&state, (const uint8_t*) input, len); + memcpy(output, &state, HASH_SIZE); +} diff --git a/src/crypto/flex/cryptonote/cryptonight_turtle.h b/src/crypto/flex/cryptonote/cryptonight_turtle.h new file mode 100644 index 0000000000..e9334e4492 --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_turtle.h @@ -0,0 +1,17 @@ +#ifndef CRYPTONIGHTTURTLE_H +#define CRYPTONIGHTTURTLE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void cryptonightturtle_hash(const char* input, char* output, uint32_t len, int variant); +void cryptonightturtle_fast_hash(const char* input, char* output, uint32_t len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/flex/cryptonote/cryptonight_turtle_lite.c b/src/crypto/flex/cryptonote/cryptonight_turtle_lite.c new file mode 100644 index 0000000000..dc73458c0d --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_turtle_lite.c @@ -0,0 +1,300 @@ +// Copyright (c) 2012-2013 The Cryptonote developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +// Portions Copyright (c) 2018 The Monero developers +// Portions Copyright (c) 2018 The TurtleCoin Developers + +#include +#include +#include "crypto/oaes_lib.h" +#include "crypto/c_keccak.h" +#include "crypto/c_groestl.h" +#include "crypto/c_blake256.h" +#include "crypto/c_jh.h" +#include "../../cn/c_skein.h" +#include "crypto/int-util.h" +#include "crypto/hash-ops.h" +#include "crypto/variant2_int_sqrt.h" + +#if defined(_MSC_VER) +#include +#endif + +#define MEMORY 262144 /* 256KB - 2^18 */ +#define ITER 131072 /* 2^17 */ +#define ITER_DIV 65536 /* 2^16 */ +#define AES_BLOCK_SIZE 16 +#define AES_KEY_SIZE 32 /*16*/ +#define INIT_SIZE_BLK 8 +#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) +#define CN_INIT (MEMORY / INIT_SIZE_BYTE) +#define CN_AES_INIT (MEMORY / AES_BLOCK_SIZE) / 2 + +#define VARIANT1_1(p) \ + do if (variant == 1) \ + { \ + const uint8_t tmp = ((const uint8_t*)(p))[11]; \ + static const uint32_t table = 0x75310; \ + const uint8_t index = (((tmp >> 3) & 6) | (tmp & 1)) << 1; \ + ((uint8_t*)(p))[11] = tmp ^ ((table >> index) & 0x30); \ + } while(0) + +#define VARIANT1_2(p) \ + do if (variant == 1) \ + { \ + ((uint64_t*)p)[1] ^= tweak1_2; \ + } while(0) + +#define VARIANT1_INIT() \ + if (variant == 1 && len < 43) \ + { \ + fprintf(stderr, "Cryptonight variant 1 needs at least 43 bytes of data"); \ + _exit(1); \ + } \ + const uint64_t tweak1_2 = (variant == 1) ? *(const uint64_t*)(((const uint8_t*)input)+35) ^ ctx->state.hs.w[24] : 0 + +#define U64(p) ((uint64_t*)(p)) + +#define VARIANT2_INIT(b, state) \ + uint64_t division_result; \ + uint64_t sqrt_result; \ + do if (variant >= 2) \ + { \ + U64(b)[2] = state.hs.w[8] ^ state.hs.w[10]; \ + U64(b)[3] = state.hs.w[9] ^ state.hs.w[11]; \ + division_result = state.hs.w[12]; \ + sqrt_result = state.hs.w[13]; \ + } while (0) + +#define VARIANT2_SHUFFLE_ADD(base_ptr, offset, a, b) \ + do if (variant >= 2) \ + { \ + uint64_t* chunk1 = U64((base_ptr) + ((offset) ^ 0x10)); \ + uint64_t* chunk2 = U64((base_ptr) + ((offset) ^ 0x20)); \ + uint64_t* chunk3 = U64((base_ptr) + ((offset) ^ 0x30)); \ + \ + const uint64_t chunk1_old[2] = { chunk1[0], chunk1[1] }; \ + \ + chunk1[0] = chunk3[0] + U64(b + 16)[0]; \ + chunk1[1] = chunk3[1] + U64(b + 16)[1]; \ + \ + chunk3[0] = chunk2[0] + U64(a)[0]; \ + chunk3[1] = chunk2[1] + U64(a)[1]; \ + \ + chunk2[0] = chunk1_old[0] + U64(b)[0]; \ + chunk2[1] = chunk1_old[1] + U64(b)[1]; \ + } while (0) + +#define VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr) \ + ((uint64_t*)(b))[0] ^= division_result ^ (sqrt_result << 32); \ + { \ + const uint64_t dividend = ((uint64_t*)(ptr))[1]; \ + const uint32_t divisor = (((uint32_t*)(ptr))[0] + (uint32_t)(sqrt_result << 1)) | 0x80000001UL; \ + division_result = ((uint32_t)(dividend / divisor)) + \ + (((uint64_t)(dividend % divisor)) << 32); \ + } \ + const uint64_t sqrt_input = ((uint64_t*)(ptr))[0] + division_result + +#define VARIANT2_INTEGER_MATH(b, ptr) \ + do if (variant >= 2) \ + { \ + VARIANT2_INTEGER_MATH_DIVISION_STEP(b, ptr); \ + VARIANT2_INTEGER_MATH_SQRT_STEP_FP64(); \ + VARIANT2_INTEGER_MATH_SQRT_FIXUP(sqrt_result); \ + } while (0) + +#define VARIANT2_2() \ + do if (variant >= 2) { \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[0] ^= hi; \ + ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x10)))[1] ^= lo; \ + hi ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[0]; \ + lo ^= ((uint64_t*)(ctx->long_state + ((j * AES_BLOCK_SIZE) ^ 0x20)))[1]; \ + } while (0) + +#pragma pack(push, 1) +union cn_slow_hash_state { + union hash_state hs; + struct { + uint8_t k[64]; + uint8_t init[INIT_SIZE_BYTE]; + }; +}; +#pragma pack(pop) + +static void do_turtle_lite_blake_hash(const void* input, size_t len, char* output) { + blake256_hash((uint8_t*)output, input, len); +} + +void do_turtle_lite_groestl_hash(const void* input, size_t len, char* output) { + groestl(input, len * 8, (uint8_t*)output); +} + +static void do_turtle_lite_jh_hash(const void* input, size_t len, char* output) { + int r = jh_hash(HASH_SIZE * 8, input, 8 * len, (uint8_t*)output); + assert(SUCCESS == r); +} + +static void do_turtle_lite_skein_hash(const void* input, size_t len, char* output) { + int r = skein_hash(8 * HASH_SIZE, input, 8 * len, (uint8_t*)output); + assert(SKEIN_SUCCESS == r); +} + +static void (* const extra_hashes[3])(const void *, size_t, char *) = { + do_turtle_lite_blake_hash, do_turtle_lite_groestl_hash, do_turtle_lite_skein_hash +}; + +extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey); +extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey); + +static inline size_t e2i(const uint8_t* a) { + return (*((uint64_t*) a) / AES_BLOCK_SIZE) & (CN_AES_INIT - 1); +} + +static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) { + ((uint64_t*) res)[1] = mul128(((uint64_t*) a)[0], ((uint64_t*) b)[0], (uint64_t*) res); +} + +static void sum_half_blocks(uint8_t* a, const uint8_t* b) { + uint64_t a0, a1, b0, b1; + + a0 = SWAP64LE(((uint64_t*) a)[0]); + a1 = SWAP64LE(((uint64_t*) a)[1]); + b0 = SWAP64LE(((uint64_t*) b)[0]); + b1 = SWAP64LE(((uint64_t*) b)[1]); + a0 += b0; + a1 += b1; + ((uint64_t*) a)[0] = SWAP64LE(a0); + ((uint64_t*) a)[1] = SWAP64LE(a1); +} + +static inline void copy_block(uint8_t* dst, const uint8_t* src) { + ((uint64_t*) dst)[0] = ((uint64_t*) src)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) src)[1]; +} + +static void swap_blocks(uint8_t* a, uint8_t* b) { + size_t i; + uint8_t t; + for (i = 0; i < AES_BLOCK_SIZE; i++) { + t = a[i]; + a[i] = b[i]; + b[i] = t; + } +} + +static inline void xor_blocks(uint8_t* a, const uint8_t* b) { + ((uint64_t*) a)[0] ^= ((uint64_t*) b)[0]; + ((uint64_t*) a)[1] ^= ((uint64_t*) b)[1]; +} + +static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* dst) { + ((uint64_t*) dst)[0] = ((uint64_t*) a)[0] ^ ((uint64_t*) b)[0]; + ((uint64_t*) dst)[1] = ((uint64_t*) a)[1] ^ ((uint64_t*) b)[1]; +} + +struct cryptonightturtlelite_ctx { + uint8_t long_state[MEMORY]; + union cn_slow_hash_state state; + uint8_t text[INIT_SIZE_BYTE]; + uint8_t a[AES_BLOCK_SIZE]; + uint8_t b[AES_BLOCK_SIZE * 2]; + uint8_t c[AES_BLOCK_SIZE]; + uint8_t aes_key[AES_KEY_SIZE]; + oaes_ctx* aes_ctx; +}; + +void cryptonightturtlelite_hash(const char* input, char* output, uint32_t len, int variant) { +#if defined(_MSC_VER) + struct cryptonightturtlelite_ctx *ctx = _malloca(sizeof(struct cryptonightturtlelite_ctx)); +#else + struct cryptonightturtlelite_ctx *ctx = alloca(sizeof(struct cryptonightturtlelite_ctx)); +#endif + hash_process(&ctx->state.hs, (const uint8_t*) input, len); + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + memcpy(ctx->aes_key, ctx->state.hs.b, AES_KEY_SIZE); + ctx->aes_ctx = (oaes_ctx*) oaes_alloc(); + size_t i, j; + + VARIANT1_INIT(); + VARIANT2_INIT(ctx->b, ctx->state); + + oaes_key_import_data(ctx->aes_ctx, ctx->aes_key, AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + aesb_pseudo_round(&ctx->text[AES_BLOCK_SIZE * j], + &ctx->text[AES_BLOCK_SIZE * j], + ctx->aes_ctx->key->exp_data); + } + memcpy(&ctx->long_state[i * INIT_SIZE_BYTE], ctx->text, INIT_SIZE_BYTE); + } + + for (i = 0; i < 16; i++) { + ctx->a[i] = ctx->state.k[i] ^ ctx->state.k[32 + i]; + ctx->b[i] = ctx->state.k[16 + i] ^ ctx->state.k[48 + i]; + } + + for (i = 0; i < ITER_DIV; i++) { + /* Dependency chain: address -> read value ------+ + * written value <-+ hard function (AES or MUL) <+ + * next address <-+ + */ + /* Iteration 1 */ + j = e2i(ctx->a); + aesb_single_round(&ctx->long_state[j * AES_BLOCK_SIZE], ctx->c, ctx->a); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + xor_blocks_dst(ctx->c, ctx->b, &ctx->long_state[j * AES_BLOCK_SIZE]); + VARIANT1_1((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + /* Iteration 2 */ + j = e2i(ctx->c); + + uint64_t* dst = (uint64_t*)&ctx->long_state[j * AES_BLOCK_SIZE]; + + uint64_t t[2]; + t[0] = dst[0]; + t[1] = dst[1]; + + VARIANT2_INTEGER_MATH(t, ctx->c); + + uint64_t hi; + uint64_t lo = mul128(((uint64_t*)ctx->c)[0], t[0], &hi); + + VARIANT2_2(); + VARIANT2_SHUFFLE_ADD(ctx->long_state, j * AES_BLOCK_SIZE, ctx->a, ctx->b); + + ((uint64_t*)ctx->a)[0] += hi; + ((uint64_t*)ctx->a)[1] += lo; + + dst[0] = ((uint64_t*)ctx->a)[0]; + dst[1] = ((uint64_t*)ctx->a)[1]; + + ((uint64_t*)ctx->a)[0] ^= t[0]; + ((uint64_t*)ctx->a)[1] ^= t[1]; + + VARIANT1_2((uint8_t*)&ctx->long_state[j * AES_BLOCK_SIZE]); + copy_block(ctx->b + AES_BLOCK_SIZE, ctx->b); + copy_block(ctx->b, ctx->c); + } + + memcpy(ctx->text, ctx->state.init, INIT_SIZE_BYTE); + oaes_key_import_data(ctx->aes_ctx, &ctx->state.hs.b[32], AES_KEY_SIZE); + for (i = 0; i < CN_INIT; i++) { + for (j = 0; j < INIT_SIZE_BLK; j++) { + xor_blocks(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]); + aesb_pseudo_round(&ctx->text[j * AES_BLOCK_SIZE], + &ctx->text[j * AES_BLOCK_SIZE], + ctx->aes_ctx->key->exp_data); + } + } + memcpy(ctx->state.init, ctx->text, INIT_SIZE_BYTE); + hash_permutation(&ctx->state.hs); + /*memcpy(hash, &state, 32);*/ + extra_hashes[ctx->state.hs.b[0] & 2](&ctx->state, 200, output); + oaes_free((OAES_CTX **) &ctx->aes_ctx); +} + +void cryptonightturtlelite_fast_hash(const char* input, char* output, uint32_t len) { + union hash_state state; + hash_process(&state, (const uint8_t*) input, len); + memcpy(output, &state, HASH_SIZE); +} diff --git a/src/crypto/flex/cryptonote/cryptonight_turtle_lite.h b/src/crypto/flex/cryptonote/cryptonight_turtle_lite.h new file mode 100644 index 0000000000..28bcd5282b --- /dev/null +++ b/src/crypto/flex/cryptonote/cryptonight_turtle_lite.h @@ -0,0 +1,17 @@ +#ifndef CRYPTONIGHTTURTLELITE_H +#define CRYPTONIGHTTURTLELITE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +void cryptonightturtlelite_hash(const char* input, char* output, uint32_t len, int variant); +void cryptonightturtlelite_fast_hash(const char* input, char* output, uint32_t len); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/flex/flex.cpp b/src/crypto/flex/flex.cpp new file mode 100644 index 0000000000..d76ad37dfb --- /dev/null +++ b/src/crypto/flex/flex.cpp @@ -0,0 +1,311 @@ +#include +#include +#include +#include + +#include "crypto/cn/CnHash.h" +#include "backend/cpu/Cpu.h" + +extern "C" { +#include "../randomx/panthera/sysendian.h" +#include "../ghostrider/sph_blake.h" +#include "../ghostrider/sph_bmw.h" +#include "../ghostrider/sph_groestl.h" +#include "../ghostrider/sph_jh.h" +#include "../ghostrider/sph_keccak.h" +#include "../ghostrider/sph_skein.h" +#include "../ghostrider/sph_luffa.h" +#include "../ghostrider/sph_cubehash.h" +#include "../ghostrider/sph_shavite.h" +#include "../ghostrider/sph_simd.h" +#include "../ghostrider/sph_echo.h" +#include "../ghostrider/sph_hamsi.h" +#include "../ghostrider/sph_fugue.h" +#include "../ghostrider/sph_shabal.h" +#include "../ghostrider/sph_whirlpool.h" +#include "../ghostrider/sph_sha2.h" +#include "cryptonote/cryptonight_dark.h" +#include "cryptonote/cryptonight_dark_lite.h" +#include "cryptonote/cryptonight_fast.h" +#include "cryptonote/cryptonight_lite.h" +#include "cryptonote/cryptonight_turtle.h" +#include "cryptonote/cryptonight_turtle_lite.h" +} +#include + +using namespace xmrig; + +enum Algo { + BLAKE = 0, + BMW, + GROESTL, + KECCAK, + SKEIN, + LUFFA, + CUBEHASH, + SHAVITE, + SIMD, + ECHO, + HAMSI, + FUGUE, + SHABAL, + WHIRLPOOL, + HASH_FUNC_COUNT +}; + +enum CNAlgo { + CNDark = 0, + CNDarklite, + CNFast, + CNLite, + CNTurtle, + CNTurtlelite, + CN_HASH_FUNC_COUNT +}; + + +static void selectAlgo(unsigned char nibble, bool* selectedAlgos, uint8_t* selectedIndex, int algoCount, int* currentCount) { + uint8_t algoDigit = (nibble & 0x0F) % algoCount; + if(!selectedAlgos[algoDigit]) { + selectedAlgos[algoDigit] = true; + selectedIndex[currentCount[0]] = algoDigit; + currentCount[0] = currentCount[0] + 1; + } + algoDigit = (nibble >> 4) % algoCount; + if(!selectedAlgos[algoDigit]) { + selectedAlgos[algoDigit] = true; + selectedIndex[currentCount[0]] = algoDigit; + currentCount[0] = currentCount[0] + 1; + } +} + +static void getAlgoString(void *mem, unsigned int size, uint8_t* selectedAlgoOutput, int algoCount) { + int i; + unsigned char *p = (unsigned char *)mem; + unsigned int len = size/2; + unsigned char j = 0; + bool selectedAlgo[algoCount]; + for(int z=0; z < algoCount; z++) { + selectedAlgo[z] = false; + } + int selectedCount = 0; + for (i=0;ilo; lo++, hi--) + { + char tmp=p[lo]; + p[lo] = p[hi]; + p[hi] = tmp; + } +} + +void flex_hash(const char* input, char* output, cryptonight_ctx** ctx) { + uint32_t hash[64/4]; + sph_blake512_context ctx_blake; + sph_bmw512_context ctx_bmw; + sph_groestl512_context ctx_groestl; + sph_jh512_context ctx_jh; + sph_keccak512_context ctx_keccak; + sph_skein512_context ctx_skein; + sph_luffa512_context ctx_luffa; + sph_cubehash512_context ctx_cubehash; + sph_shavite512_context ctx_shavite; + sph_simd512_context ctx_simd; + sph_echo512_context ctx_echo; + sph_hamsi512_context ctx_hamsi; + sph_fugue512_context ctx_fugue; + sph_shabal512_context ctx_shabal; + sph_whirlpool_context ctx_whirlpool; + sph_sha256_context ctx_sha; + hard_coded_eb = 6; + void *in = (void*) input; + int size = 80; + sph_keccak512_init(&ctx_keccak); + sph_keccak512(&ctx_keccak, in, size); + sph_keccak512_close(&ctx_keccak, hash); + uint8_t selectedAlgoOutput[15] = {0}; + uint8_t selectedCNAlgoOutput[14] = {0}; + getAlgoString(&hash, 64, selectedAlgoOutput, 14); + getAlgoString(&hash, 64, selectedCNAlgoOutput, 6); + //printf("previous hash="); + //print_hex_memory(&input[4], 64); + const CnHash::AlgoVariant av = Cpu::info()->hasAES() ? CnHash::AV_SINGLE : CnHash::AV_SINGLE_SOFT; + int i; + for (i = 0; i < 18; i++) + { + uint8_t algo; + uint8_t cnAlgo; + int coreSelection; + int cnSelection = -1; + if(i < 5) { + coreSelection = i; + } else if(i < 11) { + coreSelection = i-1; + } else { + coreSelection = i-2; + } + if(i==5) { + coreSelection = -1; + cnSelection = 0; + } + if(i==11) { + coreSelection = -1; + cnSelection = 1; + } + if(i==17) { + coreSelection = -1; + cnSelection = 2; + } + if(coreSelection >= 0) { + algo = selectedAlgoOutput[(uint8_t)coreSelection]; + } else { + algo = 16; // skip core hashing for this loop iteration + } + if(cnSelection >=0) { + cnAlgo = selectedCNAlgoOutput[(uint8_t)cnSelection]; + } else { + cnAlgo = 14; // skip cn hashing for this loop iteration + } + //selection cnAlgo. if a CN algo is selected then core algo will not be selected + cn_hash_fun f = nullptr; + switch(cnAlgo) + { + case CNDark: + cryptonightdark_hash((const char*)in, (char*)hash, size, 1); + //f = CnHash::fn(Algorithm::CN_GR_0, av, Assembly::AUTO); + break; + case CNDarklite: + cryptonightdarklite_hash((const char*)in, (char*)hash, size, 1); + //f = CnHash::fn(Algorithm::CN_GR_1, av, Assembly::AUTO); + break; + case CNFast: + cryptonightfast_hash((const char*)in, (char*)hash, size, 1); + //f = CnHash::fn(Algorithm::CN_GR_2, av, Assembly::AUTO); + break; + case CNLite: + cryptonightlite_hash((const char*)in, (char*)hash, size, 1); + //f = CnHash::fn(Algorithm::CN_GR_3, av, Assembly::AUTO); + break; + case CNTurtle: + cryptonightturtle_hash((const char*)in, (char*)hash, size, 1); + //f = CnHash::fn(Algorithm::CN_GR_4, av, Assembly::AUTO); + break; + case CNTurtlelite: + cryptonightturtlelite_hash((const char*)in, (char*)hash, size, 1); + //f = CnHash::fn(Algorithm::CN_GR_5, av, Assembly::AUTO); + break; + default: + f = nullptr; + } + if (f) f((const uint8_t*)in, size, (uint8_t*)hash, ctx, 0); + + //selection core algo + switch (algo) { + case BLAKE: + sph_blake512_init(&ctx_blake); + sph_blake512(&ctx_blake, in, size); + sph_blake512_close(&ctx_blake, hash); + break; + case BMW: + sph_bmw512_init(&ctx_bmw); + sph_bmw512(&ctx_bmw, in, size); + sph_bmw512_close(&ctx_bmw, hash); + break; + case GROESTL: + sph_groestl512_init(&ctx_groestl); + sph_groestl512(&ctx_groestl, in, size); + sph_groestl512_close(&ctx_groestl, hash); + break; + case KECCAK: + sph_keccak512_init(&ctx_keccak); + sph_keccak512(&ctx_keccak, in, size); + sph_keccak512_close(&ctx_keccak, hash); + break; + case SKEIN: + sph_skein512_init(&ctx_skein); + sph_skein512(&ctx_skein, in, size); + sph_skein512_close(&ctx_skein, hash); + break; + case LUFFA: + sph_luffa512_init(&ctx_luffa); + sph_luffa512(&ctx_luffa, in, size); + sph_luffa512_close(&ctx_luffa, hash); + break; + case CUBEHASH: + sph_cubehash512_init(&ctx_cubehash); + sph_cubehash512(&ctx_cubehash, in, size); + sph_cubehash512_close(&ctx_cubehash, hash); + break; + case SHAVITE: + sph_shavite512_init(&ctx_shavite); + sph_shavite512(&ctx_shavite, in, size); + sph_shavite512_close(&ctx_shavite, hash); + break; + case SIMD: + sph_simd512_init(&ctx_simd); + sph_simd512(&ctx_simd, in, size); + sph_simd512_close(&ctx_simd, hash); + break; + case ECHO: + sph_echo512_init(&ctx_echo); + sph_echo512(&ctx_echo, in, size); + sph_echo512_close(&ctx_echo, hash); + break; + case HAMSI: + sph_hamsi512_init(&ctx_hamsi); + sph_hamsi512(&ctx_hamsi, in, size); + sph_hamsi512_close(&ctx_hamsi, hash); + break; + case FUGUE: + sph_fugue512_init(&ctx_fugue); + sph_fugue512(&ctx_fugue, in, size); + sph_fugue512_close(&ctx_fugue, hash); + break; + case SHABAL: + sph_shabal512_init(&ctx_shabal); + sph_shabal512(&ctx_shabal, in, size); + sph_shabal512_close(&ctx_shabal, hash); + break; + case WHIRLPOOL: + sph_whirlpool_init(&ctx_whirlpool); + sph_whirlpool(&ctx_whirlpool, in, size); + sph_whirlpool_close(&ctx_whirlpool, hash); + break; + } + in = (void*) hash; + size = 64; + } + sph_keccak256_init(&ctx_keccak); + sph_keccak256(&ctx_keccak, in, size); + sph_keccak256_close(&ctx_keccak, hash); + memcpy(output, hash, 32); + hard_coded_eb = 1; +} diff --git a/src/crypto/flex/flex.h b/src/crypto/flex/flex.h new file mode 100644 index 0000000000..402ead6614 --- /dev/null +++ b/src/crypto/flex/flex.h @@ -0,0 +1,4 @@ +#pragma once + +struct cryptonight_ctx; +void flex_hash(const char* input, char* output, cryptonight_ctx** ctx); diff --git a/src/net/strategies/DonateStrategy.cpp b/src/net/strategies/DonateStrategy.cpp index 48b21656df..0db8e5d688 100644 --- a/src/net/strategies/DonateStrategy.cpp +++ b/src/net/strategies/DonateStrategy.cpp @@ -54,7 +54,7 @@ xmrig::DonateStrategy::DonateStrategy(Controller *controller, IStrategyListener m_controller(controller), m_listener(listener) { -# ifdef XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER +# if defined(XMRIG_ALGO_KAWPOW) || defined(XMRIG_ALGO_GHOSTRIDER) constexpr Pool::Mode mode = Pool::MODE_AUTO_ETH; # else constexpr Pool::Mode mode = Pool::MODE_POOL; diff --git a/src/version.h b/src/version.h index af5758742a..471de7bd8f 100644 --- a/src/version.h +++ b/src/version.h @@ -22,7 +22,7 @@ #define APP_ID "xmrig" #define APP_NAME "XMRig" #define APP_DESC "XMRig miner" -#define APP_VERSION "6.21.3-mo1" +#define APP_VERSION "6.21.3-mo2" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2024 xmrig.com"