diff --git a/xmrstak/backend/amd/amd_gpu/gpu.cpp b/xmrstak/backend/amd/amd_gpu/gpu.cpp index c664d531b..596ee2127 100644 --- a/xmrstak/backend/amd/amd_gpu/gpu.cpp +++ b/xmrstak/backend/amd/amd_gpu/gpu.cpp @@ -1004,7 +1004,7 @@ size_t XMRSetJob(GpuContext* ctx, uint8_t* input, size_t input_len, uint64_t tar return(ERR_OCL_API); } - if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon || miner_algo == cryptonight_ipbc || miner_algo == cryptonight_stellite) + if(miner_algo == cryptonight_monero || miner_algo == cryptonight_aeon || miner_algo == cryptonight_ipbc || miner_algo == cryptonight_stellite || miner_algo == cryptonight_masari) { // Input if ((ret = clSetKernelArg(ctx->Kernels[kernel_storage][1], 3, sizeof(cl_mem), &ctx->InputBuffer)) != CL_SUCCESS) diff --git a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl index c925c87a3..7bbc3865c 100644 --- a/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl +++ b/xmrstak/backend/amd/amd_gpu/opencl/cryptonight.cl @@ -513,8 +513,8 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, mem_fence(CLK_LOCAL_MEM_FENCE); -// cryptonight_heavy -#if (ALGO == 4) +// cryptonight_heavy or cryptonight_haven +#if (ALGO == 4 || ALGO == 9) __local uint4 xin[8][WORKSIZE]; /* Also left over threads perform this loop. @@ -553,8 +553,8 @@ __kernel void JOIN(cn0,ALGO)(__global ulong *input, __global uint4 *Scratchpad, __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states, ulong Threads -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) , __global ulong *input #endif ) @@ -574,8 +574,8 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states } barrier(CLK_LOCAL_MEM_FENCE); -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) uint2 tweak1_2; #endif uint4 b_x; @@ -599,8 +599,8 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states b[1] = states[3] ^ states[7]; b_x = ((uint4 *)b)[0]; -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) tweak1_2 = as_uint2(input[4]); tweak1_2.s0 >>= 24; tweak1_2.s0 |= tweak1_2.s1 << 8; @@ -627,8 +627,8 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states ((uint4 *)c)[0] = AES_Round(AES0, AES1, AES2, AES3, ((uint4 *)c)[0], ((uint4 *)a)[0]); b_x ^= ((uint4 *)c)[0]; -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) uint table = 0x75310U; // cryptonight_stellite # if(ALGO == 7) @@ -646,8 +646,8 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states a[1] += c[0] * as_ulong2(tmp).s0; a[0] += mul_hi(c[0], as_ulong2(tmp).s0); -// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite -#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7) +// cryptonight_monero || cryptonight_aeon || cryptonight_ipbc || cryptonight_stellite || cryptonight_masari +#if(ALGO == 3 || ALGO == 5 || ALGO == 6 || ALGO == 7 || ALGO == 8) # if(ALGO == 6) uint2 ipbc_tmp = tweak1_2 ^ ((uint2 *)&(a[0]))[0]; @@ -668,6 +668,7 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states idx0 = a[0]; b_x = ((uint4 *)c)[0]; + // cryptonight_heavy #if (ALGO == 4) long n = *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))); @@ -675,6 +676,14 @@ __kernel void JOIN(cn1,ALGO) (__global uint4 *Scratchpad, __global ulong *states long q = n / (d | 0x5); *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))) = n ^ q; idx0 = d ^ q; +#endif +// cryptonight_haven +#if (ALGO == 9) + long n = *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))); + int d = ((__global int*)(Scratchpad + (IDX((idx0 & MASK) >> 4))))[2]; + long q = n / (d | 0x5); + *((__global long*)(Scratchpad + (IDX((idx0 & MASK) >> 4)))) = n ^ q; + idx0 = (~d) ^ q; #endif } } @@ -734,8 +743,8 @@ __kernel void JOIN(cn2,ALGO) (__global uint4 *Scratchpad, __global ulong *states } barrier(CLK_LOCAL_MEM_FENCE); - -#if (ALGO == 4) +// cryptonight_heavy or cryptonight_haven +#if (ALGO == 4 || ALGO == 9) __local uint4 xin[8][WORKSIZE]; #endif @@ -744,7 +753,8 @@ __kernel void JOIN(cn2,ALGO) (__global uint4 *Scratchpad, __global ulong *states if(gIdx < Threads) #endif { -#if (ALGO == 4) +// cryptonight_heavy or cryptonight_haven +#if (ALGO == 4 || ALGO == 9) #pragma unroll 2 for(int i = 0; i < (MEMORY >> 7); ++i) { @@ -790,8 +800,8 @@ __kernel void JOIN(cn2,ALGO) (__global uint4 *Scratchpad, __global ulong *states #endif } -// cryptonight_heavy -#if (ALGO == 4) +// cryptonight_heavy or cryptonight_haven +#if (ALGO == 4 || ALGO == 9) /* Also left over threads perform this loop. * The left over thread results will be ignored */ diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index e15c474a6..e61381aa4 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -180,7 +180,7 @@ void cn_explode_scratchpad(const __m128i* input, __m128i* output) xin6 = _mm_load_si128(input + 10); xin7 = _mm_load_si128(input + 11); - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) { for(size_t i=0; i < 16; i++) { @@ -324,11 +324,11 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); } - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); } - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) { for (size_t i = 0; i < MEM / sizeof(__m128i); i += 8) { @@ -375,7 +375,7 @@ void cn_implode_scratchpad(const __m128i* input, __m128i* output) aes_round(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7); } - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7); } @@ -432,7 +432,7 @@ inline void cryptonight_monero_tweak(uint64_t* mem_out, __m128i tmp) uint8_t x = static_cast(vh >> 24); static const uint16_t table = 0x7531; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_masari) { const uint8_t index = (((x >> 3) & 6) | (x & 1)) << 1; vh ^= ((table >> index) & 0x3) << 28; @@ -456,7 +456,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c constexpr size_t ITERATIONS = cn_select_iter(); constexpr size_t MEM = cn_select_memory(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) { memset(output, 0, 32); return; @@ -465,7 +465,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c keccak((const uint8_t *)input, len, ctx0->hash_state, 200); uint64_t monero_const; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) { monero_const = *reinterpret_cast(reinterpret_cast(input) + 35); monero_const ^= *(reinterpret_cast(ctx0->hash_state) + 24); @@ -494,7 +494,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(ah0, al0)); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); else _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); @@ -518,7 +518,7 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c _mm_prefetch((const char*)&l0[al0 & MASK], _MM_HINT_T0); ah0 += lo; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) { if(ALGO == cryptonight_ipbc) ((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ monero_const ^ ((uint64_t*)&l0[idx0 & MASK])[0]; @@ -540,6 +540,15 @@ void cryptonight_hash(const void* input, size_t len, void* output, cryptonight_c ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; idx0 = d ^ q; } + else if(ALGO == cryptonight_haven) + { + int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; + int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; + idx0 = (~d) ^ q; + } } // Optim - 90% time boundary @@ -561,7 +570,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto constexpr size_t ITERATIONS = cn_select_iter(); constexpr size_t MEM = cn_select_memory(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) { memset(output, 0, 64); return; @@ -571,7 +580,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto keccak((const uint8_t *)input+len, len, ctx[1]->hash_state, 200); uint64_t monero_const_0, monero_const_1; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) { monero_const_0 = *reinterpret_cast(reinterpret_cast(input) + 35); monero_const_0 ^= *(reinterpret_cast(ctx[0]->hash_state) + 24); @@ -609,7 +618,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh0, axl0)); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) cryptonight_monero_tweak((uint64_t*)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); else _mm_store_si128((__m128i *)&l0[idx0 & MASK], _mm_xor_si128(bx0, cx)); @@ -627,7 +636,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else cx = _mm_aesenc_si128(cx, _mm_set_epi64x(axh1, axl1)); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) cryptonight_monero_tweak((uint64_t*)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); else _mm_store_si128((__m128i *)&l1[idx1 & MASK], _mm_xor_si128(bx1, cx)); @@ -648,7 +657,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axh0 += lo; ((uint64_t*)&l0[idx0 & MASK])[0] = axl0; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) { if(ALGO == cryptonight_ipbc) ((uint64_t*)&l0[idx0 & MASK])[1] = axh0 ^ monero_const_0 ^ ((uint64_t*)&l0[idx0 & MASK])[0]; @@ -671,6 +680,15 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; idx0 = d ^ q; } + else if(ALGO == cryptonight_haven) + { + int64_t n = ((int64_t*)&l0[idx0 & MASK])[0]; + int32_t d = ((int32_t*)&l0[idx0 & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l0[idx0 & MASK])[0] = n ^ q; + idx0 = (~d) ^ q; + } if(PREFETCH) _mm_prefetch((const char*)&l0[idx0 & MASK], _MM_HINT_T0); @@ -684,7 +702,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto axh1 += lo; ((uint64_t*)&l1[idx1 & MASK])[0] = axl1; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) { if(ALGO == cryptonight_ipbc) ((uint64_t*)&l1[idx1 & MASK])[1] = axh1 ^ monero_const_1 ^ ((uint64_t*)&l1[idx1 & MASK])[0]; @@ -707,6 +725,15 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; idx1 = d ^ q; } + else if(ALGO == cryptonight_haven) + { + int64_t n = ((int64_t*)&l1[idx1 & MASK])[0]; + int32_t d = ((int32_t*)&l1[idx1 & MASK])[2]; + int64_t q = n / (d | 0x5); + + ((int64_t*)&l1[idx1 & MASK])[0] = n ^ q; + idx1 = (~d) ^ q; + } if(PREFETCH) _mm_prefetch((const char*)&l1[idx1 & MASK], _MM_HINT_T0); @@ -736,7 +763,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else \ c = _mm_aesenc_si128(c, a); \ b = _mm_xor_si128(b, c); \ - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \ + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) \ cryptonight_monero_tweak((uint64_t*)ptr, b); \ else \ _mm_store_si128(ptr, b);\ @@ -751,7 +778,7 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto #define CN_STEP4(a, b, c, l, mc, ptr, idx) \ lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) \ + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) \ { \ _mm_store_si128(ptr, _mm_xor_si128(a, mc)); \ if (ALGO == cryptonight_ipbc) \ @@ -768,6 +795,14 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto int64_t q = n / (d | 0x5); \ ((int64_t*)&l[idx & MASK])[0] = n ^ q; \ idx = d ^ q; \ + } \ + else if(ALGO == cryptonight_haven) \ + { \ + int64_t n = ((int64_t*)&l[idx & MASK])[0]; \ + int32_t d = ((int32_t*)&l[idx & MASK])[2]; \ + int64_t q = n / (d | 0x5); \ + ((int64_t*)&l[idx & MASK])[0] = n ^ q; \ + idx = (~d) ^ q; \ } #define CONST_INIT(ctx, n) \ @@ -782,7 +817,7 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto constexpr size_t ITERATIONS = cn_select_iter(); constexpr size_t MEM = cn_select_memory(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) { memset(output, 0, 32 * 3); return; @@ -876,7 +911,7 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni constexpr size_t ITERATIONS = cn_select_iter(); constexpr size_t MEM = cn_select_memory(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) { memset(output, 0, 32 * 4); return; @@ -985,7 +1020,7 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton constexpr size_t ITERATIONS = cn_select_iter(); constexpr size_t MEM = cn_select_memory(); - if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) && len < 43) + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) && len < 43) { memset(output, 0, 32 * 5); return; diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 482c085e0..7e2a28b36 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -292,6 +292,9 @@ bool minethd::self_test() else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_stellite) { } + else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_masari) + { + } for (int i = 0; i < MAX_N; i++) cryptonight_free_ctx(ctx[i]); @@ -377,6 +380,12 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr case cryptonight_stellite: algv = 6; break; + case cryptonight_masari: + algv = 7; + break; + case cryptonight_haven: + algv = 8; + break; default: algv = 2; break; @@ -410,7 +419,15 @@ minethd::cn_hash_fun minethd::func_selector(bool bHaveAes, bool bNoPrefetch, xmr cryptonight_hash, cryptonight_hash, cryptonight_hash, - cryptonight_hash + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash, + cryptonight_hash }; std::bitset<2> digit; @@ -555,6 +572,12 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, case cryptonight_stellite: algv = 6; break; + case cryptonight_masari: + algv = 7; + break; + case cryptonight_haven: + algv = 8; + break; default: algv = 2; break; @@ -679,6 +702,41 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_double_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_triple_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_quad_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash, + cryptonight_penta_hash + }; std::bitset<2> digit; diff --git a/xmrstak/backend/cryptonight.hpp b/xmrstak/backend/cryptonight.hpp index 633ddf49b..595375610 100644 --- a/xmrstak/backend/cryptonight.hpp +++ b/xmrstak/backend/cryptonight.hpp @@ -12,7 +12,9 @@ enum xmrstak_algo cryptonight_heavy = 4, cryptonight_aeon = 5, cryptonight_ipbc = 6, // equal to cryptonight_aeon with a small tweak in the miner code - cryptonight_stellite = 7 //equal to cryptonight_monero but with one tiny change + cryptonight_stellite = 7, //equal to cryptonight_monero but with one tiny change + cryptonight_masari = 8, //equal to cryptonight_monero but with less iterations, used by masari + cryptonight_haven = 9 // // equal to cryptonight_heavy with a small tweak }; // define aeon settings @@ -28,6 +30,8 @@ constexpr size_t CRYPTONIGHT_HEAVY_MEMORY = 4 * 1024 * 1024; constexpr uint32_t CRYPTONIGHT_HEAVY_MASK = 0x3FFFF0; constexpr uint32_t CRYPTONIGHT_HEAVY_ITER = 0x40000; +constexpr uint32_t CRYPTONIGHT_MASARI_ITER = 0x40000; + template inline constexpr size_t cn_select_memory() { return 0; } @@ -52,6 +56,11 @@ inline constexpr size_t cn_select_memory() { return CRYPTONIGH template<> inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_MEMORY; } +template<> +inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_MEMORY; } + +template<> +inline constexpr size_t cn_select_memory() { return CRYPTONIGHT_HEAVY_MEMORY; } inline size_t cn_select_memory(xmrstak_algo algo) { @@ -59,6 +68,7 @@ inline size_t cn_select_memory(xmrstak_algo algo) { case cryptonight_stellite: case cryptonight_monero: + case cryptonight_masari: case cryptonight: return CRYPTONIGHT_MEMORY; case cryptonight_ipbc: @@ -66,6 +76,7 @@ inline size_t cn_select_memory(xmrstak_algo algo) case cryptonight_lite: return CRYPTONIGHT_LITE_MEMORY; case cryptonight_heavy: + case cryptonight_haven: return CRYPTONIGHT_HEAVY_MEMORY; default: return 0; @@ -96,12 +107,20 @@ inline constexpr uint32_t cn_select_mask() { return CRYPTONIGH template<> inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_MASK; } +template<> +inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_MASK; } + +template<> +inline constexpr uint32_t cn_select_mask() { return CRYPTONIGHT_HEAVY_MASK; } + + inline size_t cn_select_mask(xmrstak_algo algo) { switch(algo) { case cryptonight_stellite: case cryptonight_monero: + case cryptonight_masari: case cryptonight: return CRYPTONIGHT_MASK; case cryptonight_ipbc: @@ -109,6 +128,7 @@ inline size_t cn_select_mask(xmrstak_algo algo) case cryptonight_lite: return CRYPTONIGHT_LITE_MASK; case cryptonight_heavy: + case cryptonight_haven: return CRYPTONIGHT_HEAVY_MASK; default: return 0; @@ -139,6 +159,13 @@ inline constexpr uint32_t cn_select_iter() { return CRYPTONIGH template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } +template<> +inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_MASARI_ITER; } + +template<> +inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } + + inline size_t cn_select_iter(xmrstak_algo algo) { switch(algo) @@ -152,7 +179,10 @@ inline size_t cn_select_iter(xmrstak_algo algo) case cryptonight_lite: return CRYPTONIGHT_LITE_ITER; case cryptonight_heavy: + case cryptonight_haven: return CRYPTONIGHT_HEAVY_ITER; + case cryptonight_masari: + return CRYPTONIGHT_MASARI_ITER; default: return 0; } diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu index 57b6ad071..48243e3b5 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_core.cu @@ -231,7 +231,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti uint32_t t1[2], t2[2], res; uint32_t tweak1_2[2]; - if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if (ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) { uint32_t * state = d_ctx_state + thread * 50; tweak1_2[0] = (d_input[8] >> 24) | (d_input[9] << 8); @@ -242,7 +242,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti a = (d_ctx_a + thread * 4)[sub]; idx0 = shuffle<4>(sPtr,sub, a, 0); - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) { if(partidx != 0) { @@ -275,10 +275,10 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti t1[0] = shuffle<4>(sPtr,sub, d[x], 0); const uint32_t z = d[0] ^ d[1]; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) { const uint32_t table = 0x75310U; - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_masari) { const uint32_t index = ((z >> 26) & 12) | ((z >> 23) & 2); const uint32_t fork_7 = z ^ ((table >> index) & 0x30U) << 24; @@ -312,7 +312,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti res = *( (uint64_t *) t2 ) >> ( sub & 1 ? 32 : 0 ); - if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite) + if(ALGO == cryptonight_monero || ALGO == cryptonight_aeon || ALGO == cryptonight_ipbc || ALGO == cryptonight_stellite || ALGO == cryptonight_masari) { const uint32_t tweaked_res = tweak1_2[sub & 1] ^ res; uint32_t long_state_update = sub2 ? tweaked_res : res; @@ -341,6 +341,17 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti idx0 = d ^ q; } + else if(ALGO == cryptonight_haven) + { + int64_t n = loadGlobal64( ( (uint64_t *) long_state ) + (( idx0 & MASK ) >> 3)); + int32_t d = loadGlobal32( (uint32_t*)(( (uint64_t *) long_state ) + (( idx0 & MASK) >> 3) + 1u )); + int64_t q = n / (d | 0x5); + + if(sub&1) + storeGlobal64( ( (uint64_t *) long_state ) + (( idx0 & MASK ) >> 3), n ^ q ); + + idx0 = (~d) ^ q; + } } } @@ -348,7 +359,7 @@ __global__ void cryptonight_core_gpu_phase2( int threads, int bfactor, int parti { (d_ctx_a + thread * 4)[sub] = a; (d_ctx_b + thread * 4)[sub] = d[1]; - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) if(sub&1) *(d_ctx_b + threads * 4 + thread) = idx0; } @@ -394,7 +405,7 @@ __global__ void cryptonight_core_gpu_phase3( int threads, int bfactor, int parti cn_aes_pseudo_round_mut( sharedMemory, text, key ); - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) { #pragma unroll for ( int j = 0; j < 4; ++j ) @@ -431,7 +442,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase1<<< grid, block8 >>>( ctx->device_blocks*ctx->device_threads, bfactorOneThree, i, ctx->d_long_state, - (ALGO == cryptonight_heavy ? ctx->d_ctx_state2 : ctx->d_ctx_state), + (ALGO == cryptonight_heavy || ALGO == cryptonight_haven ? ctx->d_ctx_state2 : ctx->d_ctx_state), ctx->d_ctx_key1 )); if ( partcount > 1 && ctx->device_bsleep > 0) compat_usleep( ctx->device_bsleep ); @@ -465,7 +476,7 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce) int roundsPhase3 = partcountOneThree; - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) { // cryptonight_heavy used two full rounds over the scratchpad memory roundsPhase3 *= 2; @@ -515,5 +526,13 @@ void cryptonight_core_cpu_hash(nvid_ctx* ctx, xmrstak_algo miner_algo, uint32_t { cryptonight_core_gpu_hash(ctx, startNonce); } + else if(miner_algo == cryptonight_masari) + { + cryptonight_core_gpu_hash(ctx, startNonce); + } + else if(miner_algo == cryptonight_haven) + { + cryptonight_core_gpu_hash(ctx, startNonce); + } } diff --git a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu index 304997e5c..2cb3702fa 100644 --- a/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu +++ b/xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu @@ -114,7 +114,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric int thread = ( blockDim.x * blockIdx.x + threadIdx.x ); __shared__ uint32_t sharedMemory[1024]; - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) { cn_aes_gpu_init( sharedMemory ); __syncthreads( ); @@ -148,7 +148,7 @@ __global__ void cryptonight_extra_gpu_prepare( int threads, uint32_t * __restric memcpy( d_ctx_key2 + thread * 40, ctx_key2, 40 * 4 ); memcpy( d_ctx_state + thread * 50, ctx_state, 50 * 4 ); - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) { for(int i=0; i < 16; i++) @@ -172,7 +172,7 @@ __global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint3 __shared__ uint32_t sharedMemory[1024]; - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) { cn_aes_gpu_init( sharedMemory ); __syncthreads( ); @@ -189,7 +189,7 @@ __global__ void cryptonight_extra_gpu_final( int threads, uint64_t target, uint3 for ( i = 0; i < 50; i++ ) state[i] = ctx_state[i]; - if(ALGO == cryptonight_heavy) + if(ALGO == cryptonight_heavy || ALGO == cryptonight_haven) { uint32_t key[40]; @@ -287,7 +287,7 @@ extern "C" int cryptonight_extra_cpu_init(nvid_ctx* ctx) size_t wsize = ctx->device_blocks * ctx->device_threads; CUDA_CHECK(ctx->device_id, cudaMalloc(&ctx->d_ctx_state, 50 * sizeof(uint32_t) * wsize)); size_t ctx_b_size = 4 * sizeof(uint32_t) * wsize; - if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()) + if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() || cryptonight_haven == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()) { // extent ctx_b to hold the state of idx0 ctx_b_size += sizeof(uint32_t) * wsize; @@ -326,6 +326,11 @@ extern "C" void cryptonight_extra_cpu_prepare(nvid_ctx* ctx, uint32_t startNonce CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>( wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); } + else if(miner_algo == cryptonight_haven) + { + CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>( wsize, ctx->d_input, ctx->inputlen, startNonce, + ctx->d_ctx_state,ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2 )); + } else { /* pass two times d_ctx_state because the second state is used later in phase1, @@ -355,6 +360,14 @@ extern "C" void cryptonight_extra_cpu_final(nvid_ctx* ctx, uint32_t startNonce, cryptonight_extra_gpu_final<<>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state,ctx->d_ctx_key2 ) ); } + else if(miner_algo == cryptonight_haven) + { + CUDA_CHECK_MSG_KERNEL( + ctx->device_id, + "\n**suggestion: Try to increase the value of the attribute 'bfactor' in the NVIDIA config file.**", + cryptonight_extra_gpu_final<<>>( wsize, target, ctx->d_result_count, ctx->d_result_nonce, ctx->d_ctx_state,ctx->d_ctx_key2 ) + ); + } else { // fallback for all other algorithms @@ -612,7 +625,7 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) // up to 16kibyte extra memory is used per thread for some kernel (lmem/local memory) // 680bytes are extra meta data memory per hash size_t perThread = hashMemSize + 16192u + 680u; - if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()) + if(cryptonight_heavy == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() || cryptonight_haven == ::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo()) perThread += 50 * 4; // state double buffer size_t max_intensity = limitedMemory / perThread; diff --git a/xmrstak/jconf.cpp b/xmrstak/jconf.cpp index b85ddd3cb..c35fb51fd 100644 --- a/xmrstak/jconf.cpp +++ b/xmrstak/jconf.cpp @@ -92,6 +92,8 @@ xmrstak::coin_selection coins[] = { { "bbscoin", {cryptonight_monero, cryptonight, 3u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, { "croat", {cryptonight_monero, cryptonight, 255u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, { "cryptonight", {cryptonight_monero, cryptonight, 255u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, + { "cryptonight_masari", {cryptonight_monero, cryptonight_masari, 255u}, {cryptonight_monero, cryptonight_monero, 0u},nullptr }, + { "cryptonight_haven", {cryptonight_heavy, cryptonight_haven, 255u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, { "cryptonight_heavy", {cryptonight_heavy, cryptonight_heavy, 0u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, { "cryptonight_lite", {cryptonight_aeon, cryptonight_lite, 255u}, {cryptonight_aeon, cryptonight_lite, 7u}, nullptr }, { "cryptonight_lite_v7", {cryptonight_lite, cryptonight_aeon, 255u}, {cryptonight_aeon, cryptonight_lite, 7u}, nullptr }, @@ -101,11 +103,11 @@ xmrstak::coin_selection coins[] = { { "edollar", {cryptonight_monero, cryptonight, 255u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, { "electroneum", {cryptonight_monero, cryptonight, 255u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, { "graft", {cryptonight_monero, cryptonight, 8u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, - { "haven", {cryptonight_heavy, cryptonight, 2u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, + { "haven", {cryptonight_haven, cryptonight_heavy, 3u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, { "intense", {cryptonight_monero, cryptonight, 4u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, { "ipbc", {cryptonight_aeon, cryptonight_ipbc, 255u}, {cryptonight_aeon, cryptonight_aeon, 255u}, nullptr }, { "karbo", {cryptonight_monero, cryptonight, 255u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, - { "masari", {cryptonight_monero, cryptonight, 5u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, + { "masari", {cryptonight_masari, cryptonight_monero, 7u}, {cryptonight_monero, cryptonight_monero, 0u},nullptr }, { "monero7", {cryptonight_monero, cryptonight_monero, 0u}, {cryptonight_monero, cryptonight_monero, 0u}, "pool.usxmrpool.com:3333" }, { "stellite", {cryptonight_stellite, cryptonight_monero, 4u}, {cryptonight_monero, cryptonight_monero, 0u}, nullptr }, { "sumokoin", {cryptonight_heavy, cryptonight_heavy, 0u}, {cryptonight_heavy, cryptonight_heavy, 0u}, nullptr }, diff --git a/xmrstak/net/jpsock.cpp b/xmrstak/net/jpsock.cpp index 6c41f2ba4..9fce9b7e5 100644 --- a/xmrstak/net/jpsock.cpp +++ b/xmrstak/net/jpsock.cpp @@ -435,7 +435,7 @@ bool jpsock::process_pool_job(const opq_json_val* params, const uint64_t message const uint32_t iWorkLen = blob->GetStringLength() / 2; oPoolJob.iWorkLen = iWorkLen; - + if (iWorkLen > sizeof(pool_job::bWorkBlob)) return set_socket_error("PARSE error: Invalid job length. Are you sure you are mining the correct coin?"); @@ -487,7 +487,7 @@ bool jpsock::process_pool_job(const opq_json_val* params, const uint64_t message lck.unlock(); // send event after current job data are updated executor::inst()->push_event(ex_event(oPoolJob, pool_id)); - + return true; } @@ -697,6 +697,12 @@ bool jpsock::cmd_submit(const char* sJobId, uint32_t iNonce, const uint8_t* bRes case cryptonight_heavy: algo_name = "cryptonight_heavy"; break; + case cryptonight_haven: + algo_name = "cryptonight_haven"; + break; + case cryptonight_masari: + algo_name = "cryptonight_masari"; + break; default: algo_name = "unknown"; break; diff --git a/xmrstak/pools.tpl b/xmrstak/pools.tpl index 50299cd6a..6966d228f 100644 --- a/xmrstak/pools.tpl +++ b/xmrstak/pools.tpl @@ -26,7 +26,7 @@ POOLCONF], * edollar * electroneum * graft - * haven + * haven (automatic switch with block version 3 to cryptonight_haven) * intense * ipbc * karbo @@ -45,6 +45,7 @@ POOLCONF], * cryptonight * cryptonight_v7 * # 4MiB scratchpad memory + * cryptonight_haven * cryptonight_heavy */ diff --git a/xmrstak/version.cpp b/xmrstak/version.cpp index a680f44d6..71dffcfaf 100644 --- a/xmrstak/version.cpp +++ b/xmrstak/version.cpp @@ -18,7 +18,7 @@ #endif #define XMR_STAK_NAME "xmr-stak" -#define XMR_STAK_VERSION "2.4.4" +#define XMR_STAK_VERSION "2.4.5" #if defined(_WIN32) #define OS_TYPE "win"