From 22c517fa3f89b5f396f0730c64bc7d5317c7226b Mon Sep 17 00:00:00 2001 From: XMRig Date: Sat, 23 Feb 2019 10:06:12 +0700 Subject: [PATCH 1/8] v2.13.1-dev --- src/version.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/version.h b/src/version.h index 4c5e23fd..b00da651 100644 --- a/src/version.h +++ b/src/version.h @@ -28,7 +28,7 @@ #define APP_ID "xmrig-nvidia" #define APP_NAME "XMRig-NVIDIA" #define APP_DESC "XMRig CUDA miner" -#define APP_VERSION "2.13.0" +#define APP_VERSION "2.13.1-dev" #define APP_DOMAIN "xmrig.com" #define APP_SITE "www.xmrig.com" #define APP_COPYRIGHT "Copyright (C) 2016-2019 xmrig.com" @@ -36,7 +36,7 @@ #define APP_VER_MAJOR 2 #define APP_VER_MINOR 13 -#define APP_VER_PATCH 0 +#define APP_VER_PATCH 1 #ifdef _MSC_VER # if (_MSC_VER >= 1910) From 0e284f78725b66dbc60174ed557a49b9aebeb328 Mon Sep 17 00:00:00 2001 From: XMRig Date: Fri, 1 Mar 2019 23:13:08 +0700 Subject: [PATCH 2/8] Sync changes. --- src/base/io/Json.cpp | 44 +++++++++++++++++++++++++++++++++ src/base/io/Json.h | 6 ++++- src/base/kernel/Entry.cpp | 1 + src/base/net/Pool.cpp | 4 +++ src/common/Platform_unix.cpp | 4 +++ src/common/crypto/Algorithm.cpp | 1 + 6 files changed, 59 insertions(+), 1 deletion(-) diff --git a/src/base/io/Json.cpp b/src/base/io/Json.cpp index ee5211d8..b95994e4 100644 --- a/src/base/io/Json.cpp +++ b/src/base/io/Json.cpp @@ -47,3 +47,47 @@ const char *xmrig::Json::getString(const rapidjson::Value &obj, const char *key, return defaultValue; } + + +int xmrig::Json::getInt(const rapidjson::Value &obj, const char *key, int defaultValue) +{ + auto i = obj.FindMember(key); + if (i != obj.MemberEnd() && i->value.IsInt()) { + return i->value.GetInt(); + } + + return defaultValue; +} + + +int64_t xmrig::Json::getInt64(const rapidjson::Value &obj, const char *key, int64_t defaultValue) +{ + auto i = obj.FindMember(key); + if (i != obj.MemberEnd() && i->value.IsInt64()) { + return i->value.GetInt64(); + } + + return defaultValue; +} + + +uint64_t xmrig::Json::getUint64(const rapidjson::Value &obj, const char *key, uint64_t defaultValue) +{ + auto i = obj.FindMember(key); + if (i != obj.MemberEnd() && i->value.IsUint64()) { + return i->value.GetUint64(); + } + + return defaultValue; +} + + +unsigned xmrig::Json::getUint(const rapidjson::Value &obj, const char *key, unsigned defaultValue) +{ + auto i = obj.FindMember(key); + if (i != obj.MemberEnd() && i->value.IsUint()) { + return i->value.GetUint(); + } + + return defaultValue; +} diff --git a/src/base/io/Json.h b/src/base/io/Json.h index c6cde0d8..28dcf9a3 100644 --- a/src/base/io/Json.h +++ b/src/base/io/Json.h @@ -36,7 +36,11 @@ class Json { public: static bool getBool(const rapidjson::Value &obj, const char *key, bool defaultValue = false); - static const char *getString(const rapidjson::Value &obj, const char *key, const char *defaultValue = nullptr); + static const char *getString(const rapidjson::Value &obj, const char *key, const char *defaultValue = nullptr); + static int getInt(const rapidjson::Value &obj, const char *key, int defaultValue = 0); + static int64_t getInt64(const rapidjson::Value &obj, const char *key, int64_t defaultValue = 0); + static uint64_t getUint64(const rapidjson::Value &obj, const char *key, uint64_t defaultValue = 0); + static unsigned getUint(const rapidjson::Value &obj, const char *key, unsigned defaultValue = 0); static bool get(const char *fileName, rapidjson::Document &doc); static bool save(const char *fileName, const rapidjson::Document &doc); diff --git a/src/base/kernel/Entry.cpp b/src/base/kernel/Entry.cpp index abdfd1e1..84c4c971 100644 --- a/src/base/kernel/Entry.cpp +++ b/src/base/kernel/Entry.cpp @@ -23,6 +23,7 @@ */ +#include #include diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index f66a8c9b..dcea9b57 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -45,6 +45,8 @@ #endif +namespace xmrig { + static const char *kEnabled = "enabled"; static const char *kFingerprint = "tls-fingerprint"; static const char *kKeepalive = "keepalive"; @@ -56,6 +58,8 @@ static const char *kUrl = "url"; static const char *kUser = "user"; static const char *kVariant = "variant"; +} + xmrig::Pool::Pool() : m_enabled(true), diff --git a/src/common/Platform_unix.cpp b/src/common/Platform_unix.cpp index 901df4be..3066630a 100644 --- a/src/common/Platform_unix.cpp +++ b/src/common/Platform_unix.cpp @@ -61,6 +61,10 @@ char *Platform::createUserAgent() # if defined(__x86_64__) length += snprintf(buf + length, max - length, "x86_64) libuv/%s", uv_version_string()); +# elif defined(__aarch64__) + length += snprintf(buf + length, max - length, "aarch64) libuv/%s", uv_version_string()); +# elif defined(__arm__) + length += snprintf(buf + length, max - length, "arm) libuv/%s", uv_version_string()); # else length += snprintf(buf + length, max - length, "i686) libuv/%s", uv_version_string()); # endif diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index dd864705..7d03acfc 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -113,6 +113,7 @@ static AlgoData const xmrStakAlgorithms[] = { { "cryptonight_alloy", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO }, // xmr-stak-alloy { "cryptonight_turtle", nullptr, xmrig::CRYPTONIGHT_PICO, xmrig::VARIANT_TRTL }, { "cryptonight_gpu", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_GPU }, + { "cryptonight_r", nullptr, xmrig::CRYPTONIGHT, xmrig::VARIANT_4 }, }; #endif From 4ee84fea428b77ee1c47423736c49954733e4d36 Mon Sep 17 00:00:00 2001 From: XMRig Date: Sat, 2 Mar 2019 18:09:18 +0700 Subject: [PATCH 3/8] #241 Increase minimum required CUDA version to 8.0 due CUDA 7.5 lacks support for "nvrtcAddNameExpression". --- cmake/CUDA.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/CUDA.cmake b/cmake/CUDA.cmake index 7b19c1c9..9cef12a5 100644 --- a/cmake/CUDA.cmake +++ b/cmake/CUDA.cmake @@ -21,7 +21,7 @@ list(APPEND CMAKE_PREFIX_PATH "$ENV{CUDA_ROOT}") list(APPEND CMAKE_PREFIX_PATH "$ENV{CMAKE_PREFIX_PATH}") set(CUDA_STATIC ON) -find_package(CUDA 7.5 REQUIRED) +find_package(CUDA 8.0 REQUIRED) find_library(CUDA_LIB libcuda cuda HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBCUDA_LIBRARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64) find_library(CUDA_NVRTC_LIB libnvrtc nvrtc HINTS "${CUDA_TOOLKIT_ROOT_DIR}/lib64" "${LIBNVRTC_LIBRARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/lib/x64" /usr/lib64 /usr/local/cuda/lib64) From 7f80713dfab418d75d2243f7f2df8e1b328c6ab8 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Mon, 4 Mar 2019 22:48:14 +0100 Subject: [PATCH 4/8] CryptoNight v8 ReverseWaltz --- cmake/asm.cmake | 4 +- src/base/net/Pool.cpp | 2 + src/common/crypto/Algorithm.cpp | 2 + src/common/xmrig.h | 1 + src/crypto/CryptoNight.cpp | 95 +- src/crypto/CryptoNight.h | 6 +- src/crypto/CryptoNight_constants.h | 10 +- src/crypto/CryptoNight_monero.h | 44 +- src/crypto/CryptoNight_test.h | 87 +- src/crypto/CryptoNight_x86.h | 177 +- src/crypto/CryptonightR_gen.cpp | 29 +- .../asm/CryptonightR_soft_aes_template.inc | 279 +++ .../CryptonightR_soft_aes_template_win.inc | 279 +++ src/crypto/asm/CryptonightR_template.S | 2 + src/crypto/asm/CryptonightR_template.asm | 2 + src/crypto/asm/CryptonightR_template.h | 24 + src/crypto/asm/CryptonightR_template.inc | 34 +- src/crypto/asm/CryptonightR_template_win.inc | 34 +- .../asm/CryptonightWOW_soft_aes_template.inc | 266 +++ .../CryptonightWOW_soft_aes_template_win.inc | 266 +++ .../asm/cn2/cnv2_rwz_double_main_loop.inc | 410 +++++ src/crypto/asm/cn2/cnv2_rwz_main_loop.inc | 186 ++ src/crypto/asm/cn_main_loop.S | 20 + src/crypto/asm/cn_main_loop.asm | 16 + .../CryptonightR_soft_aes_template_win.inc | 279 +++ src/crypto/asm/win64/CryptonightR_template.S | 1593 ----------------- .../asm/win64/CryptonightR_template.asm | 2 + src/crypto/asm/win64/CryptonightR_template.h | 1063 ----------- .../asm/win64/CryptonightR_template.inc | 529 ------ .../asm/win64/CryptonightR_template_win.inc | 34 +- .../CryptonightWOW_soft_aes_template_win.inc | 266 +++ .../asm/win64/CryptonightWOW_template.inc | 486 ----- .../win64/cn2/cnv2_rwz_double_main_loop.inc | 410 +++++ .../asm/win64/cn2/cnv2_rwz_main_loop.inc | 186 ++ src/crypto/asm/win64/cn_main_loop.S | 14 + src/crypto/asm/win64/cn_main_loop.asm | 16 + src/nvidia/cuda_core.cu | 12 +- src/nvidia/cuda_extra.cu | 4 +- 38 files changed, 3309 insertions(+), 3860 deletions(-) create mode 100644 src/crypto/asm/CryptonightR_soft_aes_template.inc create mode 100644 src/crypto/asm/CryptonightR_soft_aes_template_win.inc create mode 100644 src/crypto/asm/CryptonightWOW_soft_aes_template.inc create mode 100644 src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc create mode 100644 src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc create mode 100644 src/crypto/asm/cn2/cnv2_rwz_main_loop.inc create mode 100644 src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc delete mode 100644 src/crypto/asm/win64/CryptonightR_template.S delete mode 100644 src/crypto/asm/win64/CryptonightR_template.h delete mode 100644 src/crypto/asm/win64/CryptonightR_template.inc create mode 100644 src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc delete mode 100644 src/crypto/asm/win64/CryptonightWOW_template.inc create mode 100644 src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc create mode 100644 src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc diff --git a/cmake/asm.cmake b/cmake/asm.cmake index 74fc898f..86463733 100644 --- a/cmake/asm.cmake +++ b/cmake/asm.cmake @@ -23,7 +23,7 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) if (WIN32 AND CMAKE_C_COMPILER_ID MATCHES GNU) set(XMRIG_ASM_FILES "src/crypto/asm/win64/cn_main_loop.S" - "src/crypto/asm/win64/CryptonightR_template.S" + "src/crypto/asm/CryptonightR_template.S" ) else() set(XMRIG_ASM_FILES @@ -36,7 +36,7 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) endif() add_library(${XMRIG_ASM_LIBRARY} STATIC ${XMRIG_ASM_FILES}) - set(XMRIG_ASM_SOURCES "") + set(XMRIG_ASM_SOURCES src/crypto/CryptonightR_gen.cpp) set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) else() set(XMRIG_ASM_SOURCES "") diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index dcea9b57..a1fd4331 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -132,6 +132,7 @@ xmrig::Pool::Pool(const rapidjson::Value &object) : xmrig::Pool::Pool(const char *host, uint16_t port, const char *user, const char *password, int keepAlive, bool nicehash, bool tls) : + m_enabled(true), m_nicehash(nicehash), m_tls(tls), m_keepAlive(keepAlive), @@ -483,6 +484,7 @@ void xmrig::Pool::rebuild() m_algorithms.push_back(m_algorithm); # ifndef XMRIG_PROXY_PROJECT + addVariant(VARIANT_RWZ); addVariant(VARIANT_4); addVariant(VARIANT_WOW); addVariant(VARIANT_2); diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index 7d03acfc..ae7ba7c7 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -66,6 +66,7 @@ static AlgoData const algorithms[] = { { "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, { "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW }, { "cryptonight/r", "cn/r", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 }, + { "cryptonight/rwz", "cn/rwz", xmrig::CRYPTONIGHT, xmrig::VARIANT_RWZ }, # ifndef XMRIG_NO_AEON { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, @@ -133,6 +134,7 @@ static const char *variants[] = { "gpu", "wow", "r", + "rwz" }; diff --git a/src/common/xmrig.h b/src/common/xmrig.h index c6a5f568..575251b7 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -76,6 +76,7 @@ enum Variant { VARIANT_GPU = 11, // CryptoNight-GPU (Ryo) VARIANT_WOW = 12, // CryptoNightR (Wownero) VARIANT_4 = 13, // CryptoNightR (Monero's variant 4) + VARIANT_RWZ = 14, // CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft) VARIANT_MAX }; diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index 98bbbef8..ed63d257 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -205,6 +205,13 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif # endif cryptonight_single_hash, +# ifdef XMRIG_NO_ASM + cryptonight_single_hash, +# else + cryptonight_single_hash_asm, +# endif + cryptonight_single_hash, + # ifndef XMRIG_NO_AEON cryptonight_single_hash, cryptonight_single_hash, @@ -224,6 +231,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_GPU nullptr, nullptr, // VARIANT_WOW nullptr, nullptr, // VARIANT_4 + nullptr, nullptr, // VARIANT_RWZ # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -232,6 +240,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, # endif # ifndef XMRIG_NO_SUMO @@ -257,6 +266,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_GPU nullptr, nullptr, // VARIANT_WOW nullptr, nullptr, // VARIANT_4 + nullptr, nullptr, // VARIANT_RWZ # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -265,6 +275,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, # endif # ifndef XMRIG_NO_CN_PICO nullptr, nullptr, // VARIANT_0 @@ -288,6 +299,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_GPU nullptr, nullptr, // VARIANT_WOW nullptr, nullptr, // VARIANT_4 + nullptr, nullptr, // VARIANT_RWZ #else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -296,6 +308,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, # endif }; @@ -322,27 +335,27 @@ bool CryptoNight::selfTest() { Mem::create(&m_ctx, m_algorithm, 1); if (m_algorithm == xmrig::CRYPTONIGHT) { - if (!verify2(VARIANT_WOW, test_input_WOW)) { - LOG_WARN("CryptonightR (Wownero) self-test failed"); - return false; - } + const bool rc = verify(VARIANT_0, test_output_v0) && + verify(VARIANT_1, test_output_v1) && + verify(VARIANT_2, test_output_v2) && + verify(VARIANT_XTL, test_output_xtl) && + verify(VARIANT_MSR, test_output_msr) && + verify(VARIANT_XAO, test_output_xao) && + verify(VARIANT_RTO, test_output_rto) && + verify(VARIANT_HALF, test_output_half) && + verify2(VARIANT_WOW, test_output_wow) && + verify2(VARIANT_4, test_output_r) && + verify(VARIANT_RWZ, test_output_rwz); - if (!verify2(VARIANT_4, test_input_R)) { - LOG_WARN("CryptonightR self-test failed"); - return false; +# ifndef XMRIG_NO_CN_GPU + if (!rc) { + return rc; } - return verify(VARIANT_0, test_output_v0) && - verify(VARIANT_1, test_output_v1) && - verify(VARIANT_2, test_output_v2) && - verify(VARIANT_XTL, test_output_xtl) && - verify(VARIANT_MSR, test_output_msr) && - verify(VARIANT_XAO, test_output_xao) && - verify(VARIANT_RTO, test_output_rto) && -# ifndef XMRIG_NO_CN_GPU - verify(VARIANT_GPU, test_output_gpu) && -# endif - verify(VARIANT_HALF, test_output_half); + return verify(VARIANT_GPU, test_output_gpu); +# else + return rc; +# endif } # ifndef XMRIG_NO_AEON @@ -388,55 +401,21 @@ bool CryptoNight::verify(xmrig::Variant variant, const uint8_t *referenceValue) return memcmp(output, referenceValue, 32) == 0; } -bool CryptoNight::verify2(xmrig::Variant variant, const char *test_data) +bool CryptoNight::verify2(xmrig::Variant variant, const uint8_t *referenceValue) { cn_hash_fun func = fn(variant); if (!func) { return false; } - std::stringstream s(test_data); - std::string expected_hex; - std::string input_hex; - uint64_t height; - while (!s.eof()) - { - uint8_t referenceValue[32]; - uint8_t input[256]; - - s >> expected_hex; - s >> input_hex; - s >> height; - - if ((expected_hex.length() != 64) || (input_hex.length() > 512)) - { - return false; - } - - bool err = false; - - for (int i = 0; i < 32; ++i) - { - referenceValue[i] = (hf_hex2bin(expected_hex[i * 2], err) << 4) + hf_hex2bin(expected_hex[i * 2 + 1], err); - } - - const size_t input_len = input_hex.length() / 2; - for (size_t i = 0; i < input_len; ++i) - { - input[i] = (hf_hex2bin(input_hex[i * 2], err) << 4) + hf_hex2bin(input_hex[i * 2 + 1], err); - } - - if (err) - { - return false; - } - + for (size_t i = 0; i < (sizeof(cn_r_test_input) / sizeof(cn_r_test_input[0])); ++i) { uint8_t hash[32]; - func(input, input_len, hash, &m_ctx, height); - if (memcmp(hash, referenceValue, sizeof(hash)) != 0) - { + func(cn_r_test_input[i].data, cn_r_test_input[i].size, hash, &m_ctx, cn_r_test_input[i].height); + + if (memcmp(hash, referenceValue + i * 32, sizeof hash) != 0) { return false; } } + return true; } diff --git a/src/crypto/CryptoNight.h b/src/crypto/CryptoNight.h index f5fb5a22..aacd9da1 100644 --- a/src/crypto/CryptoNight.h +++ b/src/crypto/CryptoNight.h @@ -65,6 +65,10 @@ struct cryptonight_r_data { struct cryptonight_ctx { alignas(16) uint8_t state[224]; alignas(16) uint8_t *memory; + + uint8_t unused[40]; + const uint32_t* saes_table; + cn_mainloop_fun_ms_abi generated_code; cn_mainloop_double_fun_ms_abi generated_code_double; cryptonight_r_data generated_code_data; @@ -86,7 +90,7 @@ class CryptoNight private: static bool selfTest(); static bool verify(xmrig::Variant variant, const uint8_t *referenceValue); - static bool verify2(xmrig::Variant variant, const char *test_data); + static bool verify2(xmrig::Variant variant, const uint8_t *test_data); alignas(16) static cryptonight_ctx *m_ctx; static xmrig::Algo m_algorithm; diff --git a/src/crypto/CryptoNight_constants.h b/src/crypto/CryptoNight_constants.h index 4ea1adb3..a6623eba 100644 --- a/src/crypto/CryptoNight_constants.h +++ b/src/crypto/CryptoNight_constants.h @@ -42,6 +42,7 @@ constexpr const uint32_t CRYPTONIGHT_MASK = 0x1FFFF0; constexpr const uint32_t CRYPTONIGHT_ITER = 0x80000; constexpr const uint32_t CRYPTONIGHT_HALF_ITER = 0x40000; constexpr const uint32_t CRYPTONIGHT_XAO_ITER = 0x100000; +constexpr const uint32_t CRYPTONIGHT_WALTZ_ITER = 0x60000; constexpr const uint32_t CRYPTONIGHT_GPU_ITER = 0xC000; constexpr const uint32_t CRYPTONIGHT_GPU_MASK = 0x1FFFC0; @@ -134,6 +135,7 @@ template<> inline constexpr uint32_t cn_select_iter() template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_XAO_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_GPU_ITER; } +template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_WALTZ_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_LITE_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_LITE_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } @@ -158,6 +160,9 @@ inline uint32_t cn_select_iter(Algo algorithm, Variant variant) case VARIANT_TRTL: return CRYPTONIGHT_TRTL_ITER; + case VARIANT_RWZ: + return CRYPTONIGHT_WALTZ_ITER; + default: break; } @@ -199,11 +204,12 @@ template<> inline constexpr Variant cn_base_variant() { return VA template<> inline constexpr Variant cn_base_variant() { return VARIANT_GPU; } template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } template inline constexpr bool cn_is_cryptonight_r() { return false; } -template<> inline constexpr bool cn_is_cryptonight_r() { return true; } -template<> inline constexpr bool cn_is_cryptonight_r() { return true; } +template<> inline constexpr bool cn_is_cryptonight_r() { return true; } +template<> inline constexpr bool cn_is_cryptonight_r() { return true; } } /* namespace xmrig */ diff --git a/src/crypto/CryptoNight_monero.h b/src/crypto/CryptoNight_monero.h index 26c1fff0..4e84ac5d 100644 --- a/src/crypto/CryptoNight_monero.h +++ b/src/crypto/CryptoNight_monero.h @@ -83,11 +83,11 @@ sqrt_result_xmm_##part = int_sqrt_v2(cx_0 + division_result); \ } while (0) -# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \ +# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \ do { \ - const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))); \ + const __m128i chunk1 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \ const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \ - const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \ + const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \ @@ -96,15 +96,20 @@ } \ } while (0) -# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \ +# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \ do { \ const __m128i chunk1 = _mm_xor_si128(_mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10))), _mm_set_epi64x(lo, hi)); \ const __m128i chunk2 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20))); \ hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \ lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \ const __m128i chunk3 = _mm_load_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30))); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \ - _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \ + if (reverse) { \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk1, _b1)); \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk3, _b)); \ + } else { \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x10)), _mm_add_epi64(chunk3, _b1)); \ + _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x20)), _mm_add_epi64(chunk1, _b)); \ + } \ _mm_store_si128((__m128i *)((base_ptr) + ((offset) ^ 0x30)), _mm_add_epi64(chunk2, _a)); \ } while (0) @@ -128,11 +133,11 @@ sqrt_result_##part += ((r2 + b > sqrt_input) ? -1 : 0) + ((r2 + (1ULL << 32) < sqrt_input - s) ? 1 : 0); \ } while (0) -# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c) \ +# define VARIANT2_SHUFFLE(base_ptr, offset, _a, _b, _b1, _c, reverse) \ do { \ - const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))); \ + const uint64x2_t chunk1 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x30 : 0x10)))); \ const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \ - const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \ + const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ (reverse ? 0x10 : 0x30)))); \ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ @@ -141,15 +146,20 @@ } \ } while (0) -# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo) \ +# define VARIANT2_SHUFFLE2(base_ptr, offset, _a, _b, _b1, hi, lo, reverse) \ do { \ const uint64x2_t chunk1 = veorq_u64(vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10))), vcombine_u64(vcreate_u64(hi), vcreate_u64(lo))); \ const uint64x2_t chunk2 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20))); \ hi ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[0]; \ lo ^= ((uint64_t*)((base_ptr) + ((offset) ^ 0x20)))[1]; \ const uint64x2_t chunk3 = vld1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ - vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ + if (reverse) { \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b1))); \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b))); \ + } else { \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x10)), vaddq_u64(chunk3, vreinterpretq_u64_u8(_b1))); \ + vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x20)), vaddq_u64(chunk1, vreinterpretq_u64_u8(_b))); \ + } \ vst1q_u64((uint64_t*)((base_ptr) + ((offset) ^ 0x30)), vaddq_u64(chunk2, vreinterpretq_u64_u8(_a))); \ } while (0) #endif @@ -158,6 +168,16 @@ #define SWAP64LE(x) x #define hash_extra_blake(data, length, hash) blake256_hash((uint8_t*)(hash), (uint8_t*)(data), (length)) +#ifndef NOINLINE +#ifdef __GNUC__ +#define NOINLINE __attribute__ ((noinline)) +#elif _MSC_VER +#define NOINLINE __declspec(noinline) +#else +#define NOINLINE +#endif +#endif + #include "common/xmrig.h" #include "variant4_random_math.h" diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h index 237fe31b..4f10b221 100644 --- a/src/crypto/CryptoNight_test.h +++ b/src/crypto/CryptoNight_test.h @@ -58,27 +58,58 @@ const static uint8_t test_input[380] = { 0xCF, 0x50, 0x29, 0x6A, 0x07, 0x0B, 0x93, 0x8F, 0x8F, 0xA8, 0x10, 0x04 }; -const static char* test_input_WOW = R"===(9d47bf4c41b7e8e727e681715acb47fa1677cdba9ca7bcb05ad8cc8abd5daa66 5468697320697320612074657374205468697320697320612074657374205468697320697320612074657374 1806260 -0d4a495cb844a3ca8ba4edb8e6bcf829ef1c06d9cdea2b62ca46c2a21b8b0a79 4c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e73656374657475722061646970697363696e67 1806261 -a1d6d848b5c5915fccd2f64cf216c6b1a02cf7c77bc80d8d4e51b419e88ff0dd 656c69742c2073656420646f20656975736d6f642074656d706f7220696e6369646964756e74207574206c61626f7265 1806262 -af3a8544a0221a148c2ac90484b19861e3afca33fe17021efb8ad6496b567915 657420646f6c6f7265206d61676e6120616c697175612e20557420656e696d206164206d696e696d2076656e69616d2c 1806263 -313399e0963ae8a99dab8af66d343e097dae0c0feb08dbc43ccdafef5515f413 71756973206e6f737472756420657865726369746174696f6e20756c6c616d636f206c61626f726973206e697369 1806264 -6021c6ef90bff9ae94a7506d623d3a7a86c1756d655f50dd558f716d64622a34 757420616c697175697020657820656120636f6d6d6f646f20636f6e7365717561742e20447569732061757465 1806265 -2b13000535f3db5f9b9b84a65c4351f386cd2cdedebb8c3ad2eab086e6a3fee5 697275726520646f6c6f7220696e20726570726568656e646572697420696e20766f6c7570746174652076656c6974 1806266 -fc0e1dad8e895749dc90eb690bc1ba059a1cd772afaaf65a106bf9e5e6b80503 657373652063696c6c756d20646f6c6f726520657520667567696174206e756c6c612070617269617475722e 1806267 -b60b0afe144deff7d903ed2d5545e77ebe66a3c51fee7016eeb8fee9eb630c0f 4578636570746575722073696e74206f6363616563617420637570696461746174206e6f6e2070726f6964656e742c 1806268 -64774b27e7d5fec862fc4c0c13ac6bf09123b6f05bb0e4b75c97f379a2b3a679 73756e7420696e2063756c706120717569206f666669636961206465736572756e74206d6f6c6c697420616e696d20696420657374206c61626f72756d2e 1806269)==="; - -const static char* test_input_R = R"===(f759588ad57e758467295443a9bd71490abff8e9dad1b95b6bf2f5d0d78387bc 5468697320697320612074657374205468697320697320612074657374205468697320697320612074657374 1806260 -5bb833deca2bdd7252a9ccd7b4ce0b6a4854515794b56c207262f7a5b9bdb566 4c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e73656374657475722061646970697363696e67 1806261 -1ee6728da60fbd8d7d55b2b1ade487a3cf52a2c3ac6f520db12c27d8921f6cab 656c69742c2073656420646f20656975736d6f642074656d706f7220696e6369646964756e74207574206c61626f7265 1806262 -6969fe2ddfb758438d48049f302fc2108a4fcc93e37669170e6db4b0b9b4c4cb 657420646f6c6f7265206d61676e6120616c697175612e20557420656e696d206164206d696e696d2076656e69616d2c 1806263 -7f3048b4e90d0cbe7a57c0394f37338a01fae3adfdc0e5126d863a895eb04e02 71756973206e6f737472756420657865726369746174696f6e20756c6c616d636f206c61626f726973206e697369 1806264 -1d290443a4b542af04a82f6b2494a6ee7f20f2754c58e0849032483a56e8e2ef 757420616c697175697020657820656120636f6d6d6f646f20636f6e7365717561742e20447569732061757465 1806265 -c43cc6567436a86afbd6aa9eaa7c276e9806830334b614b2bee23cc76634f6fd 697275726520646f6c6f7220696e20726570726568656e646572697420696e20766f6c7570746174652076656c6974 1806266 -87be2479c0c4e8edfdfaa5603e93f4265b3f8224c1c5946feb424819d18990a4 657373652063696c6c756d20646f6c6f726520657520667567696174206e756c6c612070617269617475722e 1806267 -dd9d6a6d8e47465cceac0877ef889b93e7eba979557e3935d7f86dce11b070f3 4578636570746575722073696e74206f6363616563617420637570696461746174206e6f6e2070726f6964656e742c 1806268 -75c6f2ae49a20521de97285b431e717125847fb8935ed84a61e7f8d36a2c3d8e 73756e7420696e2063756c706120717569206f666669636961206465736572756e74206d6f6c6c697420616e696d20696420657374206c61626f72756d2e 1806269)==="; + +struct cn_r_test_input_data +{ + uint64_t height; + size_t size; + uint8_t data[64]; +}; + + +const static cn_r_test_input_data cn_r_test_input[] = { + { 1806260, 44, { 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74 } }, + { 1806261, 50, { 0x4c, 0x6f, 0x72, 0x65, 0x6d, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6d, 0x65, 0x74, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6e, 0x67 } }, + { 1806262, 48, { 0x65, 0x6c, 0x69, 0x74, 0x2c, 0x20, 0x73, 0x65, 0x64, 0x20, 0x64, 0x6f, 0x20, 0x65, 0x69, 0x75, 0x73, 0x6d, 0x6f, 0x64, 0x20, 0x74, 0x65, 0x6d, 0x70, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x63, 0x69, 0x64, 0x69, 0x64, 0x75, 0x6e, 0x74, 0x20, 0x75, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x65 } }, + { 1806263, 48, { 0x65, 0x74, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x6d, 0x61, 0x67, 0x6e, 0x61, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x61, 0x2e, 0x20, 0x55, 0x74, 0x20, 0x65, 0x6e, 0x69, 0x6d, 0x20, 0x61, 0x64, 0x20, 0x6d, 0x69, 0x6e, 0x69, 0x6d, 0x20, 0x76, 0x65, 0x6e, 0x69, 0x61, 0x6d, 0x2c } }, + { 1806264, 46, { 0x71, 0x75, 0x69, 0x73, 0x20, 0x6e, 0x6f, 0x73, 0x74, 0x72, 0x75, 0x64, 0x20, 0x65, 0x78, 0x65, 0x72, 0x63, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x75, 0x6c, 0x6c, 0x61, 0x6d, 0x63, 0x6f, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x69, 0x73, 0x20, 0x6e, 0x69, 0x73, 0x69 } }, + { 1806265, 45, { 0x75, 0x74, 0x20, 0x61, 0x6c, 0x69, 0x71, 0x75, 0x69, 0x70, 0x20, 0x65, 0x78, 0x20, 0x65, 0x61, 0x20, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x64, 0x6f, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x65, 0x71, 0x75, 0x61, 0x74, 0x2e, 0x20, 0x44, 0x75, 0x69, 0x73, 0x20, 0x61, 0x75, 0x74, 0x65 } }, + { 1806266, 47, { 0x69, 0x72, 0x75, 0x72, 0x65, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x20, 0x69, 0x6e, 0x20, 0x72, 0x65, 0x70, 0x72, 0x65, 0x68, 0x65, 0x6e, 0x64, 0x65, 0x72, 0x69, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x76, 0x6f, 0x6c, 0x75, 0x70, 0x74, 0x61, 0x74, 0x65, 0x20, 0x76, 0x65, 0x6c, 0x69, 0x74 } }, + { 1806267, 44, { 0x65, 0x73, 0x73, 0x65, 0x20, 0x63, 0x69, 0x6c, 0x6c, 0x75, 0x6d, 0x20, 0x64, 0x6f, 0x6c, 0x6f, 0x72, 0x65, 0x20, 0x65, 0x75, 0x20, 0x66, 0x75, 0x67, 0x69, 0x61, 0x74, 0x20, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x20, 0x70, 0x61, 0x72, 0x69, 0x61, 0x74, 0x75, 0x72, 0x2e } }, + { 1806268, 47, { 0x45, 0x78, 0x63, 0x65, 0x70, 0x74, 0x65, 0x75, 0x72, 0x20, 0x73, 0x69, 0x6e, 0x74, 0x20, 0x6f, 0x63, 0x63, 0x61, 0x65, 0x63, 0x61, 0x74, 0x20, 0x63, 0x75, 0x70, 0x69, 0x64, 0x61, 0x74, 0x61, 0x74, 0x20, 0x6e, 0x6f, 0x6e, 0x20, 0x70, 0x72, 0x6f, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x2c } }, + { 1806269, 62, { 0x73, 0x75, 0x6e, 0x74, 0x20, 0x69, 0x6e, 0x20, 0x63, 0x75, 0x6c, 0x70, 0x61, 0x20, 0x71, 0x75, 0x69, 0x20, 0x6f, 0x66, 0x66, 0x69, 0x63, 0x69, 0x61, 0x20, 0x64, 0x65, 0x73, 0x65, 0x72, 0x75, 0x6e, 0x74, 0x20, 0x6d, 0x6f, 0x6c, 0x6c, 0x69, 0x74, 0x20, 0x61, 0x6e, 0x69, 0x6d, 0x20, 0x69, 0x64, 0x20, 0x65, 0x73, 0x74, 0x20, 0x6c, 0x61, 0x62, 0x6f, 0x72, 0x75, 0x6d, 0x2e } }, +}; + + +// "cn/wow" +const static uint8_t test_output_wow[] = { + 0x9d, 0x47, 0xbf, 0x4c, 0x41, 0xb7, 0xe8, 0xe7, 0x27, 0xe6, 0x81, 0x71, 0x5a, 0xcb, 0x47, 0xfa, 0x16, 0x77, 0xcd, 0xba, 0x9c, 0xa7, 0xbc, 0xb0, 0x5a, 0xd8, 0xcc, 0x8a, 0xbd, 0x5d, 0xaa, 0x66, + 0x0d, 0x4a, 0x49, 0x5c, 0xb8, 0x44, 0xa3, 0xca, 0x8b, 0xa4, 0xed, 0xb8, 0xe6, 0xbc, 0xf8, 0x29, 0xef, 0x1c, 0x06, 0xd9, 0xcd, 0xea, 0x2b, 0x62, 0xca, 0x46, 0xc2, 0xa2, 0x1b, 0x8b, 0x0a, 0x79, + 0xa1, 0xd6, 0xd8, 0x48, 0xb5, 0xc5, 0x91, 0x5f, 0xcc, 0xd2, 0xf6, 0x4c, 0xf2, 0x16, 0xc6, 0xb1, 0xa0, 0x2c, 0xf7, 0xc7, 0x7b, 0xc8, 0x0d, 0x8d, 0x4e, 0x51, 0xb4, 0x19, 0xe8, 0x8f, 0xf0, 0xdd, + 0xaf, 0x3a, 0x85, 0x44, 0xa0, 0x22, 0x1a, 0x14, 0x8c, 0x2a, 0xc9, 0x04, 0x84, 0xb1, 0x98, 0x61, 0xe3, 0xaf, 0xca, 0x33, 0xfe, 0x17, 0x02, 0x1e, 0xfb, 0x8a, 0xd6, 0x49, 0x6b, 0x56, 0x79, 0x15, + 0x31, 0x33, 0x99, 0xe0, 0x96, 0x3a, 0xe8, 0xa9, 0x9d, 0xab, 0x8a, 0xf6, 0x6d, 0x34, 0x3e, 0x09, 0x7d, 0xae, 0x0c, 0x0f, 0xeb, 0x08, 0xdb, 0xc4, 0x3c, 0xcd, 0xaf, 0xef, 0x55, 0x15, 0xf4, 0x13, + 0x60, 0x21, 0xc6, 0xef, 0x90, 0xbf, 0xf9, 0xae, 0x94, 0xa7, 0x50, 0x6d, 0x62, 0x3d, 0x3a, 0x7a, 0x86, 0xc1, 0x75, 0x6d, 0x65, 0x5f, 0x50, 0xdd, 0x55, 0x8f, 0x71, 0x6d, 0x64, 0x62, 0x2a, 0x34, + 0x2b, 0x13, 0x00, 0x05, 0x35, 0xf3, 0xdb, 0x5f, 0x9b, 0x9b, 0x84, 0xa6, 0x5c, 0x43, 0x51, 0xf3, 0x86, 0xcd, 0x2c, 0xde, 0xde, 0xbb, 0x8c, 0x3a, 0xd2, 0xea, 0xb0, 0x86, 0xe6, 0xa3, 0xfe, 0xe5, + 0xfc, 0x0e, 0x1d, 0xad, 0x8e, 0x89, 0x57, 0x49, 0xdc, 0x90, 0xeb, 0x69, 0x0b, 0xc1, 0xba, 0x05, 0x9a, 0x1c, 0xd7, 0x72, 0xaf, 0xaa, 0xf6, 0x5a, 0x10, 0x6b, 0xf9, 0xe5, 0xe6, 0xb8, 0x05, 0x03, + 0xb6, 0x0b, 0x0a, 0xfe, 0x14, 0x4d, 0xef, 0xf7, 0xd9, 0x03, 0xed, 0x2d, 0x55, 0x45, 0xe7, 0x7e, 0xbe, 0x66, 0xa3, 0xc5, 0x1f, 0xee, 0x70, 0x16, 0xee, 0xb8, 0xfe, 0xe9, 0xeb, 0x63, 0x0c, 0x0f, + 0x64, 0x77, 0x4b, 0x27, 0xe7, 0xd5, 0xfe, 0xc8, 0x62, 0xfc, 0x4c, 0x0c, 0x13, 0xac, 0x6b, 0xf0, 0x91, 0x23, 0xb6, 0xf0, 0x5b, 0xb0, 0xe4, 0xb7, 0x5c, 0x97, 0xf3, 0x79, 0xa2, 0xb3, 0xa6, 0x79, +}; + + +// "cn/r" +const static uint8_t test_output_r[] = { + 0xf7, 0x59, 0x58, 0x8a, 0xd5, 0x7e, 0x75, 0x84, 0x67, 0x29, 0x54, 0x43, 0xa9, 0xbd, 0x71, 0x49, 0x0a, 0xbf, 0xf8, 0xe9, 0xda, 0xd1, 0xb9, 0x5b, 0x6b, 0xf2, 0xf5, 0xd0, 0xd7, 0x83, 0x87, 0xbc, + 0x5b, 0xb8, 0x33, 0xde, 0xca, 0x2b, 0xdd, 0x72, 0x52, 0xa9, 0xcc, 0xd7, 0xb4, 0xce, 0x0b, 0x6a, 0x48, 0x54, 0x51, 0x57, 0x94, 0xb5, 0x6c, 0x20, 0x72, 0x62, 0xf7, 0xa5, 0xb9, 0xbd, 0xb5, 0x66, + 0x1e, 0xe6, 0x72, 0x8d, 0xa6, 0x0f, 0xbd, 0x8d, 0x7d, 0x55, 0xb2, 0xb1, 0xad, 0xe4, 0x87, 0xa3, 0xcf, 0x52, 0xa2, 0xc3, 0xac, 0x6f, 0x52, 0x0d, 0xb1, 0x2c, 0x27, 0xd8, 0x92, 0x1f, 0x6c, 0xab, + 0x69, 0x69, 0xfe, 0x2d, 0xdf, 0xb7, 0x58, 0x43, 0x8d, 0x48, 0x04, 0x9f, 0x30, 0x2f, 0xc2, 0x10, 0x8a, 0x4f, 0xcc, 0x93, 0xe3, 0x76, 0x69, 0x17, 0x0e, 0x6d, 0xb4, 0xb0, 0xb9, 0xb4, 0xc4, 0xcb, + 0x7f, 0x30, 0x48, 0xb4, 0xe9, 0x0d, 0x0c, 0xbe, 0x7a, 0x57, 0xc0, 0x39, 0x4f, 0x37, 0x33, 0x8a, 0x01, 0xfa, 0xe3, 0xad, 0xfd, 0xc0, 0xe5, 0x12, 0x6d, 0x86, 0x3a, 0x89, 0x5e, 0xb0, 0x4e, 0x02, + 0x1d, 0x29, 0x04, 0x43, 0xa4, 0xb5, 0x42, 0xaf, 0x04, 0xa8, 0x2f, 0x6b, 0x24, 0x94, 0xa6, 0xee, 0x7f, 0x20, 0xf2, 0x75, 0x4c, 0x58, 0xe0, 0x84, 0x90, 0x32, 0x48, 0x3a, 0x56, 0xe8, 0xe2, 0xef, + 0xc4, 0x3c, 0xc6, 0x56, 0x74, 0x36, 0xa8, 0x6a, 0xfb, 0xd6, 0xaa, 0x9e, 0xaa, 0x7c, 0x27, 0x6e, 0x98, 0x06, 0x83, 0x03, 0x34, 0xb6, 0x14, 0xb2, 0xbe, 0xe2, 0x3c, 0xc7, 0x66, 0x34, 0xf6, 0xfd, + 0x87, 0xbe, 0x24, 0x79, 0xc0, 0xc4, 0xe8, 0xed, 0xfd, 0xfa, 0xa5, 0x60, 0x3e, 0x93, 0xf4, 0x26, 0x5b, 0x3f, 0x82, 0x24, 0xc1, 0xc5, 0x94, 0x6f, 0xeb, 0x42, 0x48, 0x19, 0xd1, 0x89, 0x90, 0xa4, + 0xdd, 0x9d, 0x6a, 0x6d, 0x8e, 0x47, 0x46, 0x5c, 0xce, 0xac, 0x08, 0x77, 0xef, 0x88, 0x9b, 0x93, 0xe7, 0xeb, 0xa9, 0x79, 0x55, 0x7e, 0x39, 0x35, 0xd7, 0xf8, 0x6d, 0xce, 0x11, 0xb0, 0x70, 0xf3, + 0x75, 0xc6, 0xf2, 0xae, 0x49, 0xa2, 0x05, 0x21, 0xde, 0x97, 0x28, 0x5b, 0x43, 0x1e, 0x71, 0x71, 0x25, 0x84, 0x7f, 0xb8, 0x93, 0x5e, 0xd8, 0x4a, 0x61, 0xe7, 0xf8, 0xd3, 0x6a, 0x2c, 0x3d, 0x8e, +}; + // "cn/0" const static uint8_t test_output_v0[160] = { @@ -199,6 +230,18 @@ const static uint8_t test_output_rto[160] = { 0xE7, 0x81, 0x4E, 0x2A, 0xBD, 0x62, 0xC1, 0x1B, 0x7C, 0xB9, 0x33, 0x7B, 0xEE, 0x95, 0x80, 0xB3 }; +const static uint8_t test_output_rwz[160] = { + 0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85, + 0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8, + 0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52, + 0x49, 0x54, 0x0b, 0x91, 0xea, 0x61, 0x7f, 0x98, 0x7d, 0x39, 0x17, 0xb7, 0xd7, 0x65, 0xff, 0x75, + 0x13, 0x21, 0x1d, 0xce, 0x61, 0x5a, 0xdc, 0x5f, 0x8c, 0xcb, 0x1f, 0x6f, 0xbb, 0x92, 0x88, 0xc3, + 0xe3, 0xe2, 0xfc, 0x4f, 0x62, 0xfb, 0xf0, 0x48, 0x02, 0x01, 0xd3, 0xbe, 0x77, 0x6a, 0x40, 0xca, + 0x9a, 0xe9, 0xba, 0x0c, 0xc0, 0x2b, 0x11, 0xf6, 0x9b, 0xee, 0x24, 0x3a, 0xd8, 0x86, 0x18, 0xd0, + 0xe8, 0xeb, 0xcb, 0x38, 0x2c, 0xf5, 0x99, 0x83, 0x14, 0x7b, 0x0c, 0x20, 0xbe, 0x50, 0xf4, 0x87, + 0x83, 0x41, 0x75, 0xd8, 0xd1, 0xdd, 0x4b, 0x73, 0xb3, 0x92, 0x8f, 0xe6, 0x1c, 0x72, 0x70, 0xf5, + 0x7c, 0xf6, 0x23, 0x3a, 0xb4, 0x5f, 0xdf, 0xde, 0xa6, 0x5a, 0x58, 0xec, 0x13, 0x5a, 0x23, 0x2f, +}; #ifndef XMRIG_NO_AEON // "cn-lite/0" @@ -295,8 +338,6 @@ const static uint8_t test_output_pico_trtl[160] = { }; #endif -unsigned char hf_hex2bin(char c, bool &err); -char hf_bin2hex(unsigned char c); #ifndef XMRIG_NO_CN_GPU // "cn/gpu" diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 4c5d4ac0..5c4de441 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -192,31 +192,102 @@ static inline void aes_genkey(const __m128i* memory, __m128i* k0, __m128i* k1, _ } +static FORCEINLINE void soft_aesenc(void* __restrict ptr, const void* __restrict key, const uint32_t* __restrict t) +{ + uint32_t x0 = ((const uint32_t*)(ptr))[0]; + uint32_t x1 = ((const uint32_t*)(ptr))[1]; + uint32_t x2 = ((const uint32_t*)(ptr))[2]; + uint32_t x3 = ((const uint32_t*)(ptr))[3]; + + uint32_t y0 = t[x0 & 0xff]; x0 >>= 8; + uint32_t y1 = t[x1 & 0xff]; x1 >>= 8; + uint32_t y2 = t[x2 & 0xff]; x2 >>= 8; + uint32_t y3 = t[x3 & 0xff]; x3 >>= 8; + t += 256; + + y0 ^= t[x1 & 0xff]; x1 >>= 8; + y1 ^= t[x2 & 0xff]; x2 >>= 8; + y2 ^= t[x3 & 0xff]; x3 >>= 8; + y3 ^= t[x0 & 0xff]; x0 >>= 8; + t += 256; + + y0 ^= t[x2 & 0xff]; x2 >>= 8; + y1 ^= t[x3 & 0xff]; x3 >>= 8; + y2 ^= t[x0 & 0xff]; x0 >>= 8; + y3 ^= t[x1 & 0xff]; x1 >>= 8; + t += 256; + + y0 ^= t[x3]; + y1 ^= t[x0]; + y2 ^= t[x1]; + y3 ^= t[x2]; + + ((uint32_t*)ptr)[0] = y0 ^ ((uint32_t*)key)[0]; + ((uint32_t*)ptr)[1] = y1 ^ ((uint32_t*)key)[1]; + ((uint32_t*)ptr)[2] = y2 ^ ((uint32_t*)key)[2]; + ((uint32_t*)ptr)[3] = y3 ^ ((uint32_t*)key)[3]; +} + +static FORCEINLINE __m128i soft_aesenc(const void* __restrict ptr, const __m128i key, const uint32_t* __restrict t) +{ + uint32_t x0 = ((const uint32_t*)(ptr))[0]; + uint32_t x1 = ((const uint32_t*)(ptr))[1]; + uint32_t x2 = ((const uint32_t*)(ptr))[2]; + uint32_t x3 = ((const uint32_t*)(ptr))[3]; + + uint32_t y0 = t[x0 & 0xff]; x0 >>= 8; + uint32_t y1 = t[x1 & 0xff]; x1 >>= 8; + uint32_t y2 = t[x2 & 0xff]; x2 >>= 8; + uint32_t y3 = t[x3 & 0xff]; x3 >>= 8; + t += 256; + + y0 ^= t[x1 & 0xff]; x1 >>= 8; + y1 ^= t[x2 & 0xff]; x2 >>= 8; + y2 ^= t[x3 & 0xff]; x3 >>= 8; + y3 ^= t[x0 & 0xff]; x0 >>= 8; + t += 256; + + y0 ^= t[x2 & 0xff]; x2 >>= 8; + y1 ^= t[x3 & 0xff]; x3 >>= 8; + y2 ^= t[x0 & 0xff]; x0 >>= 8; + y3 ^= t[x1 & 0xff]; x1 >>= 8; + + y0 ^= t[x3 + 256]; + y1 ^= t[x0 + 256]; + y2 ^= t[x1 + 256]; + y3 ^= t[x2 + 256]; + + return _mm_xor_si128(_mm_set_epi32(y3, y2, y1, y0), key); +} + template -static inline void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) +void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7); + +template<> +NOINLINE void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) { - if (SOFT_AES) { - *x0 = soft_aesenc((uint32_t*)x0, key); - *x1 = soft_aesenc((uint32_t*)x1, key); - *x2 = soft_aesenc((uint32_t*)x2, key); - *x3 = soft_aesenc((uint32_t*)x3, key); - *x4 = soft_aesenc((uint32_t*)x4, key); - *x5 = soft_aesenc((uint32_t*)x5, key); - *x6 = soft_aesenc((uint32_t*)x6, key); - *x7 = soft_aesenc((uint32_t*)x7, key); - } - else { - *x0 = _mm_aesenc_si128(*x0, key); - *x1 = _mm_aesenc_si128(*x1, key); - *x2 = _mm_aesenc_si128(*x2, key); - *x3 = _mm_aesenc_si128(*x3, key); - *x4 = _mm_aesenc_si128(*x4, key); - *x5 = _mm_aesenc_si128(*x5, key); - *x6 = _mm_aesenc_si128(*x6, key); - *x7 = _mm_aesenc_si128(*x7, key); - } + *x0 = soft_aesenc((uint32_t*)x0, key, (const uint32_t*)saes_table); + *x1 = soft_aesenc((uint32_t*)x1, key, (const uint32_t*)saes_table); + *x2 = soft_aesenc((uint32_t*)x2, key, (const uint32_t*)saes_table); + *x3 = soft_aesenc((uint32_t*)x3, key, (const uint32_t*)saes_table); + *x4 = soft_aesenc((uint32_t*)x4, key, (const uint32_t*)saes_table); + *x5 = soft_aesenc((uint32_t*)x5, key, (const uint32_t*)saes_table); + *x6 = soft_aesenc((uint32_t*)x6, key, (const uint32_t*)saes_table); + *x7 = soft_aesenc((uint32_t*)x7, key, (const uint32_t*)saes_table); } +template<> +FORCEINLINE void aes_round(__m128i key, __m128i* x0, __m128i* x1, __m128i* x2, __m128i* x3, __m128i* x4, __m128i* x5, __m128i* x6, __m128i* x7) +{ + *x0 = _mm_aesenc_si128(*x0, key); + *x1 = _mm_aesenc_si128(*x1, key); + *x2 = _mm_aesenc_si128(*x2, key); + *x3 = _mm_aesenc_si128(*x3, key); + *x4 = _mm_aesenc_si128(*x4, key); + *x5 = _mm_aesenc_si128(*x5, key); + *x6 = _mm_aesenc_si128(*x6, key); + *x7 = _mm_aesenc_si128(*x7, key); +} inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3, __m128i& x4, __m128i& x5, __m128i& x6, __m128i& x7) { @@ -460,7 +531,7 @@ template static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i bx1, __m128i& cx) { if (BASE == xmrig::VARIANT_2) { - VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx); + VARIANT2_SHUFFLE(l, idx, ax0, bx0, bx1, cx, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); _mm_store_si128((__m128i *)mem_out, _mm_xor_si128(bx0, cx)); } else { __m128i tmp = _mm_xor_si128(bx0, cx); @@ -478,6 +549,8 @@ static inline void cryptonight_monero_tweak(uint64_t* mem_out, const uint8_t* l, } } +void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); +void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); template inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height) @@ -498,9 +571,31 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si cn_explode_scratchpad((__m128i*) ctx[0]->state, (__m128i*) ctx[0]->memory); - const uint8_t* l0 = ctx[0]->memory; uint64_t* h0 = reinterpret_cast(ctx[0]->state); +#ifndef XMRIG_NO_ASM + if (SOFT_AES && xmrig::cn_is_cryptonight_r()) + { + if (!ctx[0]->generated_code_data.match(VARIANT, height)) { + V4_Instruction code[256]; + const int code_size = v4_random_math_init(code, height); + + if (VARIANT == xmrig::VARIANT_WOW) + wow_soft_aes_compile_code(code, code_size, reinterpret_cast(ctx[0]->generated_code), xmrig::ASM_NONE); + else if (VARIANT == xmrig::VARIANT_4) + v4_soft_aes_compile_code(code, code_size, reinterpret_cast(ctx[0]->generated_code), xmrig::ASM_NONE); + + ctx[0]->generated_code_data.variant = VARIANT; + ctx[0]->generated_code_data.height = height; + } + + ctx[0]->saes_table = (const uint32_t*)saes_table; + ctx[0]->generated_code(ctx[0]); + } else { +#endif + + const uint8_t* l0 = ctx[0]->memory; + VARIANT1_INIT(0); VARIANT2_INIT(0); VARIANT2_SET_ROUNDING_MODE(); @@ -524,7 +619,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si cx = aes_round_tweak_div(cx, ax0); } else if (SOFT_AES) { - cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0); + cx = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table); } else { cx = _mm_aesenc_si128(cx, ax0); @@ -558,9 +653,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si if (BASE == xmrig::VARIANT_2) { if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx); + VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx0, bx1, cx, 0); } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo); + VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx0, bx1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); } } @@ -602,6 +697,10 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si bx0 = cx; } +#ifndef XMRIG_NO_ASM + } +#endif + cn_implode_scratchpad((__m128i*) ctx[0]->memory, (__m128i*) ctx[0]->state); xmrig::keccakf(h0, 24); @@ -655,6 +754,8 @@ extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx *ctx); extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx *ctx); extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx *ctx); extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); +extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx *ctx); +extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx* ctx0, cryptonight_ctx* ctx1); extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm; @@ -744,6 +845,9 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_ cn_trtl_mainloop_bulldozer_asm(ctx[0]); } } + else if (VARIANT == xmrig::VARIANT_RWZ) { + cnv2_rwz_mainloop_asm(ctx[0]); + } else if (xmrig::cn_is_cryptonight_r()) { ctx[0]->generated_code(ctx[0]); } @@ -782,6 +886,9 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_ else if (VARIANT == xmrig::VARIANT_TRTL) { cn_trtl_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); } + else if (VARIANT == xmrig::VARIANT_RWZ) { + cnv2_rwz_double_mainloop_asm(ctx[0], ctx[1]); + } else if (xmrig::cn_is_cryptonight_r()) { ctx[0]->generated_code_double(ctx[0], ctx[1]); } @@ -857,8 +964,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si cx1 = aes_round_tweak_div(cx1, ax1); } else if (SOFT_AES) { - cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0); - cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1); + cx0 = soft_aesenc((uint32_t*)&l0[idx0 & MASK], ax0, (const uint32_t*)saes_table); + cx1 = soft_aesenc((uint32_t*)&l1[idx1 & MASK], ax1, (const uint32_t*)saes_table); } else { cx0 = _mm_aesenc_si128(cx0, ax0); @@ -896,9 +1003,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si if (BASE == xmrig::VARIANT_2) { if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0); + VARIANT2_SHUFFLE(l0, idx0 & MASK, ax0, bx00, bx01, cx0, 0); } else { - VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo); + VARIANT2_SHUFFLE2(l0, idx0 & MASK, ax0, bx00, bx01, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); } } @@ -952,9 +1059,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si if (BASE == xmrig::VARIANT_2) { if (VARIANT == xmrig::VARIANT_4) { - VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1); + VARIANT2_SHUFFLE(l1, idx1 & MASK, ax1, bx10, bx11, cx1, 0); } else { - VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo); + VARIANT2_SHUFFLE2(l1, idx1 & MASK, ax1, bx10, bx11, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); } } @@ -1019,7 +1126,7 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si c = aes_round_tweak_div(c, a); \ } \ else if (SOFT_AES) { \ - c = soft_aesenc(c, a); \ + c = soft_aesenc(&c, a, (const uint32_t*)saes_table); \ } else { \ c = _mm_aesenc_si128(c, a); \ } \ @@ -1056,9 +1163,9 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si lo = __umul128(idx, cl##part, &hi); \ if (BASE == xmrig::VARIANT_2) { \ if (VARIANT == xmrig::VARIANT_4) { \ - VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c); \ + VARIANT2_SHUFFLE(l, idx & MASK, a, b0, b1, c, 0); \ } else { \ - VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo); \ + VARIANT2_SHUFFLE2(l, idx & MASK, a, b0, b1, hi, lo, (VARIANT == xmrig::VARIANT_RWZ ? 1 : 0)); \ } \ } \ if (VARIANT == xmrig::VARIANT_4) { \ diff --git a/src/crypto/CryptonightR_gen.cpp b/src/crypto/CryptonightR_gen.cpp index 55f94662..3fba49cd 100644 --- a/src/crypto/CryptonightR_gen.cpp +++ b/src/crypto/CryptonightR_gen.cpp @@ -31,7 +31,6 @@ typedef void(*void_func)(); #include "crypto/asm/CryptonightR_template.h" #include "Mem.h" -#if !defined XMRIG_ARM && !defined XMRIG_NO_ASM static inline void add_code(uint8_t* &p, void (*p1)(), void (*p2)()) { @@ -159,4 +158,30 @@ void v4_compile_code_double(const V4_Instruction* code, int code_size, void* mac Mem::flushInstructionCache(machine_code, p - p0); } -#endif +void wow_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) +{ + uint8_t* p0 = reinterpret_cast(machine_code); + uint8_t* p = p0; + + add_code(p, CryptonightWOW_soft_aes_template_part1, CryptonightWOW_soft_aes_template_part2); + add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); + add_code(p, CryptonightWOW_soft_aes_template_part2, CryptonightWOW_soft_aes_template_part3); + *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightWOW_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightWOW_soft_aes_template_part1)) - (p - p0)); + add_code(p, CryptonightWOW_soft_aes_template_part3, CryptonightWOW_soft_aes_template_end); + + Mem::flushInstructionCache(machine_code, p - p0); +} + +void v4_soft_aes_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM) +{ + uint8_t* p0 = reinterpret_cast(machine_code); + uint8_t* p = p0; + + add_code(p, CryptonightR_soft_aes_template_part1, CryptonightR_soft_aes_template_part2); + add_random_math(p, code, code_size, instructions, instructions_mov, false, ASM); + add_code(p, CryptonightR_soft_aes_template_part2, CryptonightR_soft_aes_template_part3); + *(int*)(p - 4) = static_cast((((const uint8_t*)CryptonightR_soft_aes_template_mainloop) - ((const uint8_t*)CryptonightR_soft_aes_template_part1)) - (p - p0)); + add_code(p, CryptonightR_soft_aes_template_part3, CryptonightR_soft_aes_template_end); + + Mem::flushInstructionCache(machine_code, p - p0); +} diff --git a/src/crypto/asm/CryptonightR_soft_aes_template.inc b/src/crypto/asm/CryptonightR_soft_aes_template.inc new file mode 100644 index 00000000..40c7874d --- /dev/null +++ b/src/crypto/asm/CryptonightR_soft_aes_template.inc @@ -0,0 +1,279 @@ +PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part1) +PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_mainloop) +PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part2) +PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_part3) +PUBLIC FN_PREFIX(CryptonightR_soft_aes_template_end) + +ALIGN(64) +FN_PREFIX(CryptonightR_soft_aes_template_part1): + mov QWORD PTR [rsp+8], rcx + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 232 + + mov eax, [rcx+96] + mov ebx, [rcx+100] + mov esi, [rcx+104] + mov edx, [rcx+108] + mov [rsp+144], eax + mov [rsp+148], ebx + mov [rsp+152], esi + mov [rsp+156], edx + + mov rax, QWORD PTR [rcx+48] + mov r10, rcx + xor rax, QWORD PTR [rcx+16] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r9, QWORD PTR [rcx+40] + xor r9, QWORD PTR [rcx+8] + movq xmm4, rax + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r11, QWORD PTR [rcx+224] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r10+72] + mov rax, QWORD PTR [r10+80] + movq xmm0, rdx + xor rax, QWORD PTR [r10+64] + + movaps XMMWORD PTR [rsp+16], xmm6 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+48], xmm8 + movaps XMMWORD PTR [rsp+64], xmm9 + movaps XMMWORD PTR [rsp+80], xmm10 + movaps XMMWORD PTR [rsp+96], xmm11 + movaps XMMWORD PTR [rsp+112], xmm12 + movaps XMMWORD PTR [rsp+128], xmm13 + + movq xmm5, rax + + mov rax, r8 + punpcklqdq xmm4, xmm0 + and eax, 2097136 + movq xmm10, QWORD PTR [r10+96] + movq xmm0, rcx + mov rcx, QWORD PTR [r10+104] + xorps xmm9, xmm9 + mov QWORD PTR [rsp+328], rax + movq xmm12, r11 + mov QWORD PTR [rsp+320], r9 + punpcklqdq xmm5, xmm0 + movq xmm13, rcx + mov r12d, 524288 + + ALIGN(64) +FN_PREFIX(CryptonightR_soft_aes_template_mainloop): + movd xmm11, r12d + mov r12, QWORD PTR [r10+272] + lea r13, QWORD PTR [rax+r11] + mov esi, DWORD PTR [r13] + movq xmm0, r9 + mov r10d, DWORD PTR [r13+4] + movq xmm7, r8 + mov ebp, DWORD PTR [r13+12] + mov r14d, DWORD PTR [r13+8] + mov rdx, QWORD PTR [rsp+328] + movzx ecx, sil + shr esi, 8 + punpcklqdq xmm7, xmm0 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + movd xmm1, r11d + add ebp, 256 + movq r11, xmm12 + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + xor eax, DWORD PTR [r12+rcx*4] + mov rcx, rdx + xor eax, r15d + punpckldq xmm2, xmm1 + xor rcx, 16 + movd xmm6, eax + mov rax, rdx + punpckldq xmm6, xmm0 + xor rax, 32 + punpckldq xmm6, xmm2 + xor rdx, 48 + movdqu xmm2, XMMWORD PTR [rcx+r11] + pxor xmm6, xmm2 + pxor xmm6, xmm7 + paddq xmm2, xmm4 + movdqu xmm1, XMMWORD PTR [rax+r11] + movdqu xmm0, XMMWORD PTR [rdx+r11] + pxor xmm6, xmm1 + pxor xmm6, xmm0 + paddq xmm0, xmm5 + movdqu XMMWORD PTR [rcx+r11], xmm0 + movdqu XMMWORD PTR [rax+r11], xmm2 + movq rcx, xmm13 + paddq xmm1, xmm7 + movdqu XMMWORD PTR [rdx+r11], xmm1 + movq rdi, xmm6 + mov r10, rdi + and r10d, 2097136 + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [r13], xmm0 + + mov ebx, [rsp+144] + mov ebp, [rsp+152] + add ebx, [rsp+148] + add ebp, [rsp+156] + shl rbp, 32 + or rbx, rbp + + xor rbx, QWORD PTR [r10+r11] + lea r14, QWORD PTR [r10+r11] + mov rbp, QWORD PTR [r14+8] + + mov [rsp+160], rbx + mov [rsp+168], rdi + mov [rsp+176], rbp + mov [rsp+184], r10 + mov r10, rsp + + mov ebx, [rsp+144] + mov esi, [rsp+148] + mov edi, [rsp+152] + mov ebp, [rsp+156] + + movd esp, xmm7 + movaps xmm0, xmm7 + psrldq xmm0, 8 + movd r15d, xmm0 + movd eax, xmm4 + movd edx, xmm5 + movaps xmm0, xmm5 + psrldq xmm0, 8 + movd r9d, xmm0 + +FN_PREFIX(CryptonightR_soft_aes_template_part2): + mov rsp, r10 + mov [rsp+144], ebx + mov [rsp+148], esi + mov [rsp+152], edi + mov [rsp+156], ebp + + mov edi, edi + shl rbp, 32 + or rbp, rdi + xor r8, rbp + + mov ebx, ebx + shl rsi, 32 + or rsi, rbx + xor QWORD PTR [rsp+320], rsi + + mov rbx, [rsp+160] + mov rdi, [rsp+168] + mov rbp, [rsp+176] + mov r10, [rsp+184] + + mov r9, r10 + xor r9, 16 + mov rcx, r10 + xor rcx, 32 + xor r10, 48 + mov rax, rbx + mul rdi + movdqu xmm2, XMMWORD PTR [r9+r11] + movdqu xmm1, XMMWORD PTR [rcx+r11] + pxor xmm6, xmm2 + pxor xmm6, xmm1 + paddq xmm1, xmm7 + add r8, rdx + movdqu xmm0, XMMWORD PTR [r10+r11] + pxor xmm6, xmm0 + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [r9+r11], xmm0 + movdqa xmm5, xmm4 + mov r9, QWORD PTR [rsp+320] + movdqa xmm4, xmm6 + add r9, rax + movdqu XMMWORD PTR [rcx+r11], xmm2 + movdqu XMMWORD PTR [r10+r11], xmm1 + mov r10, QWORD PTR [rsp+304] + movd r12d, xmm11 + mov QWORD PTR [r14], r8 + xor r8, rbx + mov rax, r8 + mov QWORD PTR [r14+8], r9 + and eax, 2097136 + xor r9, rbp + mov QWORD PTR [rsp+320], r9 + mov QWORD PTR [rsp+328], rax + sub r12d, 1 + jne FN_PREFIX(CryptonightR_soft_aes_template_mainloop) + +FN_PREFIX(CryptonightR_soft_aes_template_part3): + movaps xmm6, XMMWORD PTR [rsp+16] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+48] + movaps xmm9, XMMWORD PTR [rsp+64] + movaps xmm10, XMMWORD PTR [rsp+80] + movaps xmm11, XMMWORD PTR [rsp+96] + movaps xmm12, XMMWORD PTR [rsp+112] + movaps xmm13, XMMWORD PTR [rsp+128] + + add rsp, 232 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + ret +FN_PREFIX(CryptonightR_soft_aes_template_end): diff --git a/src/crypto/asm/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc new file mode 100644 index 00000000..d771f69c --- /dev/null +++ b/src/crypto/asm/CryptonightR_soft_aes_template_win.inc @@ -0,0 +1,279 @@ +PUBLIC CryptonightR_soft_aes_template_part1 +PUBLIC CryptonightR_soft_aes_template_mainloop +PUBLIC CryptonightR_soft_aes_template_part2 +PUBLIC CryptonightR_soft_aes_template_part3 +PUBLIC CryptonightR_soft_aes_template_end + +ALIGN(64) +CryptonightR_soft_aes_template_part1: + mov QWORD PTR [rsp+8], rcx + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 232 + + mov eax, [rcx+96] + mov ebx, [rcx+100] + mov esi, [rcx+104] + mov edx, [rcx+108] + mov [rsp+144], eax + mov [rsp+148], ebx + mov [rsp+152], esi + mov [rsp+156], edx + + mov rax, QWORD PTR [rcx+48] + mov r10, rcx + xor rax, QWORD PTR [rcx+16] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r9, QWORD PTR [rcx+40] + xor r9, QWORD PTR [rcx+8] + movq xmm4, rax + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r11, QWORD PTR [rcx+224] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r10+72] + mov rax, QWORD PTR [r10+80] + movq xmm0, rdx + xor rax, QWORD PTR [r10+64] + + movaps XMMWORD PTR [rsp+16], xmm6 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+48], xmm8 + movaps XMMWORD PTR [rsp+64], xmm9 + movaps XMMWORD PTR [rsp+80], xmm10 + movaps XMMWORD PTR [rsp+96], xmm11 + movaps XMMWORD PTR [rsp+112], xmm12 + movaps XMMWORD PTR [rsp+128], xmm13 + + movq xmm5, rax + + mov rax, r8 + punpcklqdq xmm4, xmm0 + and eax, 2097136 + movq xmm10, QWORD PTR [r10+96] + movq xmm0, rcx + mov rcx, QWORD PTR [r10+104] + xorps xmm9, xmm9 + mov QWORD PTR [rsp+328], rax + movq xmm12, r11 + mov QWORD PTR [rsp+320], r9 + punpcklqdq xmm5, xmm0 + movq xmm13, rcx + mov r12d, 524288 + + ALIGN(64) +CryptonightR_soft_aes_template_mainloop: + movd xmm11, r12d + mov r12, QWORD PTR [r10+272] + lea r13, QWORD PTR [rax+r11] + mov esi, DWORD PTR [r13] + movq xmm0, r9 + mov r10d, DWORD PTR [r13+4] + movq xmm7, r8 + mov ebp, DWORD PTR [r13+12] + mov r14d, DWORD PTR [r13+8] + mov rdx, QWORD PTR [rsp+328] + movzx ecx, sil + shr esi, 8 + punpcklqdq xmm7, xmm0 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + movd xmm1, r11d + add ebp, 256 + movq r11, xmm12 + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + xor eax, DWORD PTR [r12+rcx*4] + mov rcx, rdx + xor eax, r15d + punpckldq xmm2, xmm1 + xor rcx, 16 + movd xmm6, eax + mov rax, rdx + punpckldq xmm6, xmm0 + xor rax, 32 + punpckldq xmm6, xmm2 + xor rdx, 48 + movdqu xmm2, XMMWORD PTR [rcx+r11] + pxor xmm6, xmm2 + pxor xmm6, xmm7 + paddq xmm2, xmm4 + movdqu xmm1, XMMWORD PTR [rax+r11] + movdqu xmm0, XMMWORD PTR [rdx+r11] + pxor xmm6, xmm1 + pxor xmm6, xmm0 + paddq xmm0, xmm5 + movdqu XMMWORD PTR [rcx+r11], xmm0 + movdqu XMMWORD PTR [rax+r11], xmm2 + movq rcx, xmm13 + paddq xmm1, xmm7 + movdqu XMMWORD PTR [rdx+r11], xmm1 + movq rdi, xmm6 + mov r10, rdi + and r10d, 2097136 + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [r13], xmm0 + + mov ebx, [rsp+144] + mov ebp, [rsp+152] + add ebx, [rsp+148] + add ebp, [rsp+156] + shl rbp, 32 + or rbx, rbp + + xor rbx, QWORD PTR [r10+r11] + lea r14, QWORD PTR [r10+r11] + mov rbp, QWORD PTR [r14+8] + + mov [rsp+160], rbx + mov [rsp+168], rdi + mov [rsp+176], rbp + mov [rsp+184], r10 + mov r10, rsp + + mov ebx, [rsp+144] + mov esi, [rsp+148] + mov edi, [rsp+152] + mov ebp, [rsp+156] + + movd esp, xmm7 + movaps xmm0, xmm7 + psrldq xmm0, 8 + movd r15d, xmm0 + movd eax, xmm4 + movd edx, xmm5 + movaps xmm0, xmm5 + psrldq xmm0, 8 + movd r9d, xmm0 + +CryptonightR_soft_aes_template_part2: + mov rsp, r10 + mov [rsp+144], ebx + mov [rsp+148], esi + mov [rsp+152], edi + mov [rsp+156], ebp + + mov edi, edi + shl rbp, 32 + or rbp, rdi + xor r8, rbp + + mov ebx, ebx + shl rsi, 32 + or rsi, rbx + xor QWORD PTR [rsp+320], rsi + + mov rbx, [rsp+160] + mov rdi, [rsp+168] + mov rbp, [rsp+176] + mov r10, [rsp+184] + + mov r9, r10 + xor r9, 16 + mov rcx, r10 + xor rcx, 32 + xor r10, 48 + mov rax, rbx + mul rdi + movdqu xmm2, XMMWORD PTR [r9+r11] + movdqu xmm1, XMMWORD PTR [rcx+r11] + pxor xmm6, xmm2 + pxor xmm6, xmm1 + paddq xmm1, xmm7 + add r8, rdx + movdqu xmm0, XMMWORD PTR [r10+r11] + pxor xmm6, xmm0 + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [r9+r11], xmm0 + movdqa xmm5, xmm4 + mov r9, QWORD PTR [rsp+320] + movdqa xmm4, xmm6 + add r9, rax + movdqu XMMWORD PTR [rcx+r11], xmm2 + movdqu XMMWORD PTR [r10+r11], xmm1 + mov r10, QWORD PTR [rsp+304] + movd r12d, xmm11 + mov QWORD PTR [r14], r8 + xor r8, rbx + mov rax, r8 + mov QWORD PTR [r14+8], r9 + and eax, 2097136 + xor r9, rbp + mov QWORD PTR [rsp+320], r9 + mov QWORD PTR [rsp+328], rax + sub r12d, 1 + jne CryptonightR_soft_aes_template_mainloop + +CryptonightR_soft_aes_template_part3: + movaps xmm6, XMMWORD PTR [rsp+16] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+48] + movaps xmm9, XMMWORD PTR [rsp+64] + movaps xmm10, XMMWORD PTR [rsp+80] + movaps xmm11, XMMWORD PTR [rsp+96] + movaps xmm12, XMMWORD PTR [rsp+112] + movaps xmm13, XMMWORD PTR [rsp+128] + + add rsp, 232 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + ret +CryptonightR_soft_aes_template_end: diff --git a/src/crypto/asm/CryptonightR_template.S b/src/crypto/asm/CryptonightR_template.S index 5f3046cb..d2974d16 100644 --- a/src/crypto/asm/CryptonightR_template.S +++ b/src/crypto/asm/CryptonightR_template.S @@ -531,6 +531,8 @@ PUBLIC FN_PREFIX(CryptonightR_instruction_mov256) #include "CryptonightWOW_template.inc" #include "CryptonightR_template.inc" +#include "CryptonightWOW_soft_aes_template.inc" +#include "CryptonightR_soft_aes_template.inc" FN_PREFIX(CryptonightR_instruction0): imul rbx, rbx diff --git a/src/crypto/asm/CryptonightR_template.asm b/src/crypto/asm/CryptonightR_template.asm index 25b72c3c..250eca3d 100644 --- a/src/crypto/asm/CryptonightR_template.asm +++ b/src/crypto/asm/CryptonightR_template.asm @@ -518,6 +518,8 @@ PUBLIC CryptonightR_instruction_mov256 INCLUDE CryptonightWOW_template_win.inc INCLUDE CryptonightR_template_win.inc +INCLUDE CryptonightWOW_soft_aes_template_win.inc +INCLUDE CryptonightR_soft_aes_template_win.inc CryptonightR_instruction0: imul rbx, rbx diff --git a/src/crypto/asm/CryptonightR_template.h b/src/crypto/asm/CryptonightR_template.h index c2054705..d9159a8f 100644 --- a/src/crypto/asm/CryptonightR_template.h +++ b/src/crypto/asm/CryptonightR_template.h @@ -26,6 +26,30 @@ extern "C" void CryptonightR_template_double_part4(); void CryptonightR_template_double_end(); + void CryptonightWOW_soft_aes_template_part1(); + void CryptonightWOW_soft_aes_template_mainloop(); + void CryptonightWOW_soft_aes_template_part2(); + void CryptonightWOW_soft_aes_template_part3(); + void CryptonightWOW_soft_aes_template_end(); + void CryptonightWOW_soft_aes_template_double_part1(); + void CryptonightWOW_soft_aes_template_double_mainloop(); + void CryptonightWOW_soft_aes_template_double_part2(); + void CryptonightWOW_soft_aes_template_double_part3(); + void CryptonightWOW_soft_aes_template_double_part4(); + void CryptonightWOW_soft_aes_template_double_end(); + + void CryptonightR_soft_aes_template_part1(); + void CryptonightR_soft_aes_template_mainloop(); + void CryptonightR_soft_aes_template_part2(); + void CryptonightR_soft_aes_template_part3(); + void CryptonightR_soft_aes_template_end(); + void CryptonightR_soft_aes_template_double_part1(); + void CryptonightR_soft_aes_template_double_mainloop(); + void CryptonightR_soft_aes_template_double_part2(); + void CryptonightR_soft_aes_template_double_part3(); + void CryptonightR_soft_aes_template_double_part4(); + void CryptonightR_soft_aes_template_double_end(); + void CryptonightR_instruction0(); void CryptonightR_instruction1(); void CryptonightR_instruction2(); diff --git a/src/crypto/asm/CryptonightR_template.inc b/src/crypto/asm/CryptonightR_template.inc index b54486a5..8ecab724 100644 --- a/src/crypto/asm/CryptonightR_template.inc +++ b/src/crypto/asm/CryptonightR_template.inc @@ -70,29 +70,30 @@ FN_PREFIX(CryptonightR_template_mainloop): aesenc xmm5, xmm4 - mov r12d, r9d + mov r13d, r9d mov eax, r9d xor r9d, 48 - xor r12d, 16 + xor r13d, 16 xor eax, 32 movdqu xmm0, XMMWORD PTR [r9+r11] movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm2, XMMWORD PTR [r13+r11] movdqu xmm1, XMMWORD PTR [rax+r11] pxor xmm0, xmm2 pxor xmm5, xmm1 pxor xmm5, xmm0 + + movq r12, xmm5 + movd r10d, xmm5 + and r10d, 2097136 + paddq xmm3, xmm7 paddq xmm2, xmm6 paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [r13+r11], xmm3 movdqu XMMWORD PTR [rax+r11], xmm2 movdqu XMMWORD PTR [r9+r11], xmm1 - movq r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - movdqa xmm0, xmm5 pxor xmm0, xmm6 movdqu XMMWORD PTR [rdx], xmm0 @@ -102,14 +103,16 @@ FN_PREFIX(CryptonightR_template_mainloop): shl rdx, 32 or r13, rdx - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - movd eax, xmm6 movd edx, xmm7 pextrd r9d, xmm7, 2 + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + FN_PREFIX(CryptonightR_template_part2): + lea rcx, [r10+r11] + mov eax, edi mov edx, ebp shl rdx, 32 @@ -124,6 +127,8 @@ FN_PREFIX(CryptonightR_template_part2): mov rax, r13 mul r12 + add r15, rax + add rsp, rdx mov r9d, r10d mov r12d, r10d @@ -145,13 +150,10 @@ FN_PREFIX(CryptonightR_template_part2): movdqu XMMWORD PTR [r10+r11], xmm3 movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp + mov QWORD PTR [rcx], rsp xor rsp, r13 mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 + mov QWORD PTR [rcx+8], r15 and r9d, 2097136 xor r15, r14 movdqa xmm6, xmm5 diff --git a/src/crypto/asm/CryptonightR_template_win.inc b/src/crypto/asm/CryptonightR_template_win.inc index 150bb0e3..a170f2d2 100644 --- a/src/crypto/asm/CryptonightR_template_win.inc +++ b/src/crypto/asm/CryptonightR_template_win.inc @@ -70,29 +70,30 @@ CryptonightR_template_mainloop: aesenc xmm5, xmm4 - mov r12d, r9d + mov r13d, r9d mov eax, r9d xor r9d, 48 - xor r12d, 16 + xor r13d, 16 xor eax, 32 movdqu xmm0, XMMWORD PTR [r9+r11] movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm2, XMMWORD PTR [r13+r11] movdqu xmm1, XMMWORD PTR [rax+r11] pxor xmm0, xmm2 pxor xmm5, xmm1 pxor xmm5, xmm0 + + movq r12, xmm5 + movd r10d, xmm5 + and r10d, 2097136 + paddq xmm3, xmm7 paddq xmm2, xmm6 paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [r13+r11], xmm3 movdqu XMMWORD PTR [rax+r11], xmm2 movdqu XMMWORD PTR [r9+r11], xmm1 - movq r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - movdqa xmm0, xmm5 pxor xmm0, xmm6 movdqu XMMWORD PTR [rdx], xmm0 @@ -102,14 +103,16 @@ CryptonightR_template_mainloop: shl rdx, 32 or r13, rdx - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - movd eax, xmm6 movd edx, xmm7 pextrd r9d, xmm7, 2 + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + CryptonightR_template_part2: + lea rcx, [r10+r11] + mov eax, edi mov edx, ebp shl rdx, 32 @@ -124,6 +127,8 @@ CryptonightR_template_part2: mov rax, r13 mul r12 + add r15, rax + add rsp, rdx mov r9d, r10d mov r12d, r10d @@ -145,13 +150,10 @@ CryptonightR_template_part2: movdqu XMMWORD PTR [r10+r11], xmm3 movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp + mov QWORD PTR [rcx], rsp xor rsp, r13 mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 + mov QWORD PTR [rcx+8], r15 and r9d, 2097136 xor r15, r14 movdqa xmm6, xmm5 diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc new file mode 100644 index 00000000..feea3949 --- /dev/null +++ b/src/crypto/asm/CryptonightWOW_soft_aes_template.inc @@ -0,0 +1,266 @@ +PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part1) +PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop) +PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part2) +PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_part3) +PUBLIC FN_PREFIX(CryptonightWOW_soft_aes_template_end) + +ALIGN(64) +FN_PREFIX(CryptonightWOW_soft_aes_template_part1): + mov QWORD PTR [rsp+8], rcx + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 232 + + mov eax, [rcx+96] + mov ebx, [rcx+100] + mov esi, [rcx+104] + mov edx, [rcx+108] + mov [rsp+144], eax + mov [rsp+148], ebx + mov [rsp+152], esi + mov [rsp+156], edx + + mov rax, QWORD PTR [rcx+48] + mov r10, rcx + xor rax, QWORD PTR [rcx+16] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r9, QWORD PTR [rcx+40] + xor r9, QWORD PTR [rcx+8] + movq xmm4, rax + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r11, QWORD PTR [rcx+224] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r10+72] + mov rax, QWORD PTR [r10+80] + movq xmm0, rdx + xor rax, QWORD PTR [r10+64] + + movaps XMMWORD PTR [rsp+16], xmm6 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+48], xmm8 + movaps XMMWORD PTR [rsp+64], xmm9 + movaps XMMWORD PTR [rsp+80], xmm10 + movaps XMMWORD PTR [rsp+96], xmm11 + movaps XMMWORD PTR [rsp+112], xmm12 + movaps XMMWORD PTR [rsp+128], xmm13 + + movq xmm5, rax + + mov rax, r8 + punpcklqdq xmm4, xmm0 + and eax, 2097136 + movq xmm10, QWORD PTR [r10+96] + movq xmm0, rcx + mov rcx, QWORD PTR [r10+104] + xorps xmm9, xmm9 + mov QWORD PTR [rsp+328], rax + movq xmm12, r11 + mov QWORD PTR [rsp+320], r9 + punpcklqdq xmm5, xmm0 + movq xmm13, rcx + mov r12d, 524288 + + ALIGN(64) +FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop): + movd xmm11, r12d + mov r12, QWORD PTR [r10+272] + lea r13, QWORD PTR [rax+r11] + mov esi, DWORD PTR [r13] + movq xmm0, r9 + mov r10d, DWORD PTR [r13+4] + movq xmm7, r8 + mov ebp, DWORD PTR [r13+12] + mov r14d, DWORD PTR [r13+8] + mov rdx, QWORD PTR [rsp+328] + movzx ecx, sil + shr esi, 8 + punpcklqdq xmm7, xmm0 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + movd xmm1, r11d + add ebp, 256 + movq r11, xmm12 + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + xor eax, DWORD PTR [r12+rcx*4] + mov rcx, rdx + xor eax, r15d + punpckldq xmm2, xmm1 + xor rcx, 16 + movd xmm6, eax + mov rax, rdx + punpckldq xmm6, xmm0 + xor rax, 32 + punpckldq xmm6, xmm2 + xor rdx, 48 + movdqu xmm2, XMMWORD PTR [rcx+r11] + pxor xmm6, xmm7 + paddq xmm2, xmm4 + movdqu xmm1, XMMWORD PTR [rax+r11] + movdqu xmm0, XMMWORD PTR [rdx+r11] + paddq xmm0, xmm5 + movdqu XMMWORD PTR [rcx+r11], xmm0 + movdqu XMMWORD PTR [rax+r11], xmm2 + movq rcx, xmm13 + paddq xmm1, xmm7 + movdqu XMMWORD PTR [rdx+r11], xmm1 + movq rdi, xmm6 + mov r10, rdi + and r10d, 2097136 + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [r13], xmm0 + + mov ebx, [rsp+144] + mov ebp, [rsp+152] + add ebx, [rsp+148] + add ebp, [rsp+156] + shl rbp, 32 + or rbx, rbp + + xor rbx, QWORD PTR [r10+r11] + lea r14, QWORD PTR [r10+r11] + mov rbp, QWORD PTR [r14+8] + + mov [rsp+160], rbx + mov [rsp+168], rdi + mov [rsp+176], rbp + mov [rsp+184], r10 + mov r10, rsp + + mov ebx, [rsp+144] + mov esi, [rsp+148] + mov edi, [rsp+152] + mov ebp, [rsp+156] + + movd esp, xmm7 + movaps xmm0, xmm7 + psrldq xmm0, 8 + movd r15d, xmm0 + movd eax, xmm4 + movd edx, xmm5 + +FN_PREFIX(CryptonightWOW_soft_aes_template_part2): + mov rsp, r10 + mov [rsp+144], ebx + mov [rsp+148], esi + mov [rsp+152], edi + mov [rsp+156], ebp + + mov rbx, [rsp+160] + mov rdi, [rsp+168] + mov rbp, [rsp+176] + mov r10, [rsp+184] + + mov r9, r10 + xor r9, 16 + mov rcx, r10 + xor rcx, 32 + xor r10, 48 + mov rax, rbx + mul rdi + movdqu xmm2, XMMWORD PTR [r9+r11] + movdqu xmm1, XMMWORD PTR [rcx+r11] + paddq xmm1, xmm7 + movq xmm0, rax + movq xmm3, rdx + xor rax, QWORD PTR [r11+rcx+8] + xor rdx, QWORD PTR [rcx+r11] + punpcklqdq xmm3, xmm0 + add r8, rdx + movdqu xmm0, XMMWORD PTR [r10+r11] + pxor xmm2, xmm3 + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [r9+r11], xmm0 + movdqa xmm5, xmm4 + mov r9, QWORD PTR [rsp+320] + movdqa xmm4, xmm6 + add r9, rax + movdqu XMMWORD PTR [rcx+r11], xmm2 + movdqu XMMWORD PTR [r10+r11], xmm1 + mov r10, QWORD PTR [rsp+304] + movd r12d, xmm11 + mov QWORD PTR [r14], r8 + xor r8, rbx + mov rax, r8 + mov QWORD PTR [r14+8], r9 + and eax, 2097136 + xor r9, rbp + mov QWORD PTR [rsp+320], r9 + mov QWORD PTR [rsp+328], rax + sub r12d, 1 + jne FN_PREFIX(CryptonightWOW_soft_aes_template_mainloop) + +FN_PREFIX(CryptonightWOW_soft_aes_template_part3): + movaps xmm6, XMMWORD PTR [rsp+16] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+48] + movaps xmm9, XMMWORD PTR [rsp+64] + movaps xmm10, XMMWORD PTR [rsp+80] + movaps xmm11, XMMWORD PTR [rsp+96] + movaps xmm12, XMMWORD PTR [rsp+112] + movaps xmm13, XMMWORD PTR [rsp+128] + + add rsp, 232 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + ret +FN_PREFIX(CryptonightWOW_soft_aes_template_end): diff --git a/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc new file mode 100644 index 00000000..6ebad99f --- /dev/null +++ b/src/crypto/asm/CryptonightWOW_soft_aes_template_win.inc @@ -0,0 +1,266 @@ +PUBLIC CryptonightWOW_soft_aes_template_part1 +PUBLIC CryptonightWOW_soft_aes_template_mainloop +PUBLIC CryptonightWOW_soft_aes_template_part2 +PUBLIC CryptonightWOW_soft_aes_template_part3 +PUBLIC CryptonightWOW_soft_aes_template_end + +ALIGN(64) +CryptonightWOW_soft_aes_template_part1: + mov QWORD PTR [rsp+8], rcx + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 232 + + mov eax, [rcx+96] + mov ebx, [rcx+100] + mov esi, [rcx+104] + mov edx, [rcx+108] + mov [rsp+144], eax + mov [rsp+148], ebx + mov [rsp+152], esi + mov [rsp+156], edx + + mov rax, QWORD PTR [rcx+48] + mov r10, rcx + xor rax, QWORD PTR [rcx+16] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r9, QWORD PTR [rcx+40] + xor r9, QWORD PTR [rcx+8] + movq xmm4, rax + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r11, QWORD PTR [rcx+224] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r10+72] + mov rax, QWORD PTR [r10+80] + movq xmm0, rdx + xor rax, QWORD PTR [r10+64] + + movaps XMMWORD PTR [rsp+16], xmm6 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+48], xmm8 + movaps XMMWORD PTR [rsp+64], xmm9 + movaps XMMWORD PTR [rsp+80], xmm10 + movaps XMMWORD PTR [rsp+96], xmm11 + movaps XMMWORD PTR [rsp+112], xmm12 + movaps XMMWORD PTR [rsp+128], xmm13 + + movq xmm5, rax + + mov rax, r8 + punpcklqdq xmm4, xmm0 + and eax, 2097136 + movq xmm10, QWORD PTR [r10+96] + movq xmm0, rcx + mov rcx, QWORD PTR [r10+104] + xorps xmm9, xmm9 + mov QWORD PTR [rsp+328], rax + movq xmm12, r11 + mov QWORD PTR [rsp+320], r9 + punpcklqdq xmm5, xmm0 + movq xmm13, rcx + mov r12d, 524288 + + ALIGN(64) +CryptonightWOW_soft_aes_template_mainloop: + movd xmm11, r12d + mov r12, QWORD PTR [r10+272] + lea r13, QWORD PTR [rax+r11] + mov esi, DWORD PTR [r13] + movq xmm0, r9 + mov r10d, DWORD PTR [r13+4] + movq xmm7, r8 + mov ebp, DWORD PTR [r13+12] + mov r14d, DWORD PTR [r13+8] + mov rdx, QWORD PTR [rsp+328] + movzx ecx, sil + shr esi, 8 + punpcklqdq xmm7, xmm0 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + movd xmm1, r11d + add ebp, 256 + movq r11, xmm12 + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + xor eax, DWORD PTR [r12+rcx*4] + mov rcx, rdx + xor eax, r15d + punpckldq xmm2, xmm1 + xor rcx, 16 + movd xmm6, eax + mov rax, rdx + punpckldq xmm6, xmm0 + xor rax, 32 + punpckldq xmm6, xmm2 + xor rdx, 48 + movdqu xmm2, XMMWORD PTR [rcx+r11] + pxor xmm6, xmm7 + paddq xmm2, xmm4 + movdqu xmm1, XMMWORD PTR [rax+r11] + movdqu xmm0, XMMWORD PTR [rdx+r11] + paddq xmm0, xmm5 + movdqu XMMWORD PTR [rcx+r11], xmm0 + movdqu XMMWORD PTR [rax+r11], xmm2 + movq rcx, xmm13 + paddq xmm1, xmm7 + movdqu XMMWORD PTR [rdx+r11], xmm1 + movq rdi, xmm6 + mov r10, rdi + and r10d, 2097136 + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [r13], xmm0 + + mov ebx, [rsp+144] + mov ebp, [rsp+152] + add ebx, [rsp+148] + add ebp, [rsp+156] + shl rbp, 32 + or rbx, rbp + + xor rbx, QWORD PTR [r10+r11] + lea r14, QWORD PTR [r10+r11] + mov rbp, QWORD PTR [r14+8] + + mov [rsp+160], rbx + mov [rsp+168], rdi + mov [rsp+176], rbp + mov [rsp+184], r10 + mov r10, rsp + + mov ebx, [rsp+144] + mov esi, [rsp+148] + mov edi, [rsp+152] + mov ebp, [rsp+156] + + movd esp, xmm7 + movaps xmm0, xmm7 + psrldq xmm0, 8 + movd r15d, xmm0 + movd eax, xmm4 + movd edx, xmm5 + +CryptonightWOW_soft_aes_template_part2: + mov rsp, r10 + mov [rsp+144], ebx + mov [rsp+148], esi + mov [rsp+152], edi + mov [rsp+156], ebp + + mov rbx, [rsp+160] + mov rdi, [rsp+168] + mov rbp, [rsp+176] + mov r10, [rsp+184] + + mov r9, r10 + xor r9, 16 + mov rcx, r10 + xor rcx, 32 + xor r10, 48 + mov rax, rbx + mul rdi + movdqu xmm2, XMMWORD PTR [r9+r11] + movdqu xmm1, XMMWORD PTR [rcx+r11] + paddq xmm1, xmm7 + movq xmm0, rax + movq xmm3, rdx + xor rax, QWORD PTR [r11+rcx+8] + xor rdx, QWORD PTR [rcx+r11] + punpcklqdq xmm3, xmm0 + add r8, rdx + movdqu xmm0, XMMWORD PTR [r10+r11] + pxor xmm2, xmm3 + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [r9+r11], xmm0 + movdqa xmm5, xmm4 + mov r9, QWORD PTR [rsp+320] + movdqa xmm4, xmm6 + add r9, rax + movdqu XMMWORD PTR [rcx+r11], xmm2 + movdqu XMMWORD PTR [r10+r11], xmm1 + mov r10, QWORD PTR [rsp+304] + movd r12d, xmm11 + mov QWORD PTR [r14], r8 + xor r8, rbx + mov rax, r8 + mov QWORD PTR [r14+8], r9 + and eax, 2097136 + xor r9, rbp + mov QWORD PTR [rsp+320], r9 + mov QWORD PTR [rsp+328], rax + sub r12d, 1 + jne CryptonightWOW_soft_aes_template_mainloop + +CryptonightWOW_soft_aes_template_part3: + movaps xmm6, XMMWORD PTR [rsp+16] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+48] + movaps xmm9, XMMWORD PTR [rsp+64] + movaps xmm10, XMMWORD PTR [rsp+80] + movaps xmm11, XMMWORD PTR [rsp+96] + movaps xmm12, XMMWORD PTR [rsp+112] + movaps xmm13, XMMWORD PTR [rsp+128] + + add rsp, 232 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + ret +CryptonightWOW_soft_aes_template_end: diff --git a/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc new file mode 100644 index 00000000..d2d87173 --- /dev/null +++ b/src/crypto/asm/cn2/cnv2_rwz_double_main_loop.inc @@ -0,0 +1,410 @@ + mov rax, rsp + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 184 + + stmxcsr DWORD PTR [rsp+272] + mov DWORD PTR [rsp+276], 24448 + ldmxcsr DWORD PTR [rsp+276] + + mov r13, QWORD PTR [rcx+224] + mov r9, rdx + mov r10, QWORD PTR [rcx+32] + mov r8, rcx + xor r10, QWORD PTR [rcx] + mov r14d, 393216 + mov r11, QWORD PTR [rcx+40] + xor r11, QWORD PTR [rcx+8] + mov rsi, QWORD PTR [rdx+224] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov rdi, QWORD PTR [r9+32] + xor rdi, QWORD PTR [r9] + mov rbp, QWORD PTR [r9+40] + xor rbp, QWORD PTR [r9+8] + movq xmm0, rdx + movaps XMMWORD PTR [rax-88], xmm6 + movaps XMMWORD PTR [rax-104], xmm7 + movaps XMMWORD PTR [rax-120], xmm8 + movaps XMMWORD PTR [rsp+112], xmm9 + movaps XMMWORD PTR [rsp+96], xmm10 + movaps XMMWORD PTR [rsp+80], xmm11 + movaps XMMWORD PTR [rsp+64], xmm12 + movaps XMMWORD PTR [rsp+48], xmm13 + movaps XMMWORD PTR [rsp+32], xmm14 + movaps XMMWORD PTR [rsp+16], xmm15 + mov rdx, r10 + movq xmm4, QWORD PTR [r8+96] + and edx, 2097136 + mov rax, QWORD PTR [rcx+48] + xorps xmm13, xmm13 + xor rax, QWORD PTR [rcx+16] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r8+72] + movq xmm5, QWORD PTR [r8+104] + movq xmm7, rax + + mov eax, 1 + shl rax, 52 + movq xmm14, rax + punpcklqdq xmm14, xmm14 + + mov eax, 1023 + shl rax, 52 + movq xmm12, rax + punpcklqdq xmm12, xmm12 + + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + punpcklqdq xmm7, xmm0 + movq xmm0, rcx + mov rcx, QWORD PTR [r9+56] + xor rcx, QWORD PTR [r9+24] + movq xmm3, rax + mov rax, QWORD PTR [r9+48] + xor rax, QWORD PTR [r9+16] + punpcklqdq xmm3, xmm0 + movq xmm0, rcx + mov QWORD PTR [rsp], r13 + mov rcx, QWORD PTR [r9+88] + xor rcx, QWORD PTR [r9+72] + movq xmm6, rax + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + punpcklqdq xmm6, xmm0 + movq xmm0, rcx + mov QWORD PTR [rsp+256], r10 + mov rcx, rdi + mov QWORD PTR [rsp+264], r11 + movq xmm8, rax + and ecx, 2097136 + punpcklqdq xmm8, xmm0 + movq xmm0, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movq xmm0, QWORD PTR [r9+104] + lea r8, QWORD PTR [rcx+rsi] + movdqu xmm11, XMMWORD PTR [r8] + punpcklqdq xmm5, xmm0 + lea r9, QWORD PTR [rdx+r13] + movdqu xmm15, XMMWORD PTR [r9] + + ALIGN(64) +rwz_main_loop_double: + movdqu xmm9, xmm15 + mov eax, edx + mov ebx, edx + xor eax, 16 + xor ebx, 32 + xor edx, 48 + + movq xmm0, r11 + movq xmm2, r10 + punpcklqdq xmm2, xmm0 + aesenc xmm9, xmm2 + + movdqu xmm0, XMMWORD PTR [rdx+r13] + movdqu xmm1, XMMWORD PTR [rbx+r13] + paddq xmm0, xmm7 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [rbx+r13], xmm0 + movdqu xmm0, XMMWORD PTR [rax+r13] + movdqu XMMWORD PTR [rdx+r13], xmm1 + paddq xmm0, xmm3 + movdqu XMMWORD PTR [rax+r13], xmm0 + + movq r11, xmm9 + mov edx, r11d + and edx, 2097136 + movdqa xmm0, xmm9 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [r9], xmm0 + + lea rbx, QWORD PTR [rdx+r13] + mov r10, QWORD PTR [rdx+r13] + + movdqu xmm10, xmm11 + movq xmm0, rbp + movq xmm11, rdi + punpcklqdq xmm11, xmm0 + aesenc xmm10, xmm11 + + mov eax, ecx + mov r12d, ecx + xor eax, 16 + xor r12d, 32 + xor ecx, 48 + + movdqu xmm0, XMMWORD PTR [rcx+rsi] + paddq xmm0, xmm6 + movdqu xmm1, XMMWORD PTR [r12+rsi] + movdqu XMMWORD PTR [r12+rsi], xmm0 + paddq xmm1, xmm11 + movdqu xmm0, XMMWORD PTR [rax+rsi] + movdqu XMMWORD PTR [rcx+rsi], xmm1 + paddq xmm0, xmm8 + movdqu XMMWORD PTR [rax+rsi], xmm0 + + movq rcx, xmm10 + and ecx, 2097136 + + movdqa xmm0, xmm10 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [r8], xmm0 + mov r12, QWORD PTR [rcx+rsi] + + mov r9, QWORD PTR [rbx+8] + + xor edx, 16 + mov r8d, edx + mov r15d, edx + + movq rdx, xmm5 + shl rdx, 32 + movq rax, xmm4 + xor rdx, rax + xor r10, rdx + mov rax, r10 + mul r11 + mov r11d, r8d + xor r11d, 48 + movq xmm0, rdx + xor rdx, [r11+r13] + movq xmm1, rax + xor rax, [r11+r13+8] + punpcklqdq xmm0, xmm1 + + pxor xmm0, XMMWORD PTR [r8+r13] + movdqu xmm1, XMMWORD PTR [r11+r13] + paddq xmm0, xmm3 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [r8+r13], xmm0 + xor r8d, 32 + movdqu xmm0, XMMWORD PTR [r8+r13] + movdqu XMMWORD PTR [r8+r13], xmm1 + paddq xmm0, xmm7 + movdqu XMMWORD PTR [r11+r13], xmm0 + + mov r11, QWORD PTR [rsp+256] + add r11, rdx + mov rdx, QWORD PTR [rsp+264] + add rdx, rax + mov QWORD PTR [rbx], r11 + xor r11, r10 + mov QWORD PTR [rbx+8], rdx + xor rdx, r9 + mov QWORD PTR [rsp+256], r11 + and r11d, 2097136 + mov QWORD PTR [rsp+264], rdx + mov QWORD PTR [rsp+8], r11 + lea r15, QWORD PTR [r11+r13] + movdqu xmm15, XMMWORD PTR [r11+r13] + lea r13, QWORD PTR [rsi+rcx] + movdqa xmm0, xmm5 + psrldq xmm0, 8 + movaps xmm2, xmm13 + movq r10, xmm0 + psllq xmm5, 1 + shl r10, 32 + movdqa xmm0, xmm9 + psrldq xmm0, 8 + movdqa xmm1, xmm10 + movq r11, xmm0 + psrldq xmm1, 8 + movq r8, xmm1 + psrldq xmm4, 8 + movaps xmm0, xmm13 + movq rax, xmm4 + xor r10, rax + movaps xmm1, xmm13 + xor r10, r12 + lea rax, QWORD PTR [r11+1] + shr rax, 1 + movdqa xmm3, xmm9 + punpcklqdq xmm3, xmm10 + paddq xmm5, xmm3 + movq rdx, xmm5 + psrldq xmm5, 8 + cvtsi2sd xmm2, rax + or edx, -2147483647 + lea rax, QWORD PTR [r8+1] + shr rax, 1 + movq r9, xmm5 + cvtsi2sd xmm0, rax + or r9d, -2147483647 + cvtsi2sd xmm1, rdx + unpcklpd xmm2, xmm0 + movaps xmm0, xmm13 + cvtsi2sd xmm0, r9 + unpcklpd xmm1, xmm0 + divpd xmm2, xmm1 + paddq xmm2, xmm14 + cvttsd2si rax, xmm2 + psrldq xmm2, 8 + mov rbx, rax + imul rax, rdx + sub r11, rax + js rwz_div_fix_1 +rwz_div_fix_1_ret: + + cvttsd2si rdx, xmm2 + mov rax, rdx + imul rax, r9 + movd xmm2, r11d + movd xmm4, ebx + sub r8, rax + js rwz_div_fix_2 +rwz_div_fix_2_ret: + + movd xmm1, r8d + movd xmm0, edx + punpckldq xmm2, xmm1 + punpckldq xmm4, xmm0 + punpckldq xmm4, xmm2 + paddq xmm3, xmm4 + movdqa xmm0, xmm3 + psrlq xmm0, 12 + paddq xmm0, xmm12 + sqrtpd xmm1, xmm0 + movq r9, xmm1 + movdqa xmm5, xmm1 + psrlq xmm5, 19 + test r9, 524287 + je rwz_sqrt_fix_1 +rwz_sqrt_fix_1_ret: + + movq r9, xmm10 + psrldq xmm1, 8 + movq r8, xmm1 + test r8, 524287 + je rwz_sqrt_fix_2 +rwz_sqrt_fix_2_ret: + + mov r12d, ecx + mov r8d, ecx + xor r12d, 16 + xor r8d, 32 + xor ecx, 48 + mov rax, r10 + mul r9 + movq xmm0, rax + movq xmm3, rdx + punpcklqdq xmm3, xmm0 + + movdqu xmm0, XMMWORD PTR [r12+rsi] + pxor xmm0, xmm3 + movdqu xmm1, XMMWORD PTR [r8+rsi] + xor rdx, [r8+rsi] + xor rax, [r8+rsi+8] + movdqu xmm3, XMMWORD PTR [rcx+rsi] + paddq xmm3, xmm6 + paddq xmm1, xmm11 + paddq xmm0, xmm8 + movdqu XMMWORD PTR [r8+rsi], xmm3 + movdqu XMMWORD PTR [rcx+rsi], xmm1 + movdqu XMMWORD PTR [r12+rsi], xmm0 + + add rdi, rdx + mov QWORD PTR [r13], rdi + xor rdi, r10 + mov ecx, edi + and ecx, 2097136 + lea r8, QWORD PTR [rcx+rsi] + + mov rdx, QWORD PTR [r13+8] + add rbp, rax + mov QWORD PTR [r13+8], rbp + movdqu xmm11, XMMWORD PTR [rcx+rsi] + xor rbp, rdx + mov r13, QWORD PTR [rsp] + movdqa xmm3, xmm7 + mov rdx, QWORD PTR [rsp+8] + movdqa xmm8, xmm6 + mov r10, QWORD PTR [rsp+256] + movdqa xmm7, xmm9 + mov r11, QWORD PTR [rsp+264] + movdqa xmm6, xmm10 + mov r9, r15 + dec r14d + jne rwz_main_loop_double + + ldmxcsr DWORD PTR [rsp+272] + movaps xmm13, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+184] + movaps xmm6, XMMWORD PTR [r11-24] + movaps xmm7, XMMWORD PTR [r11-40] + movaps xmm8, XMMWORD PTR [r11-56] + movaps xmm9, XMMWORD PTR [r11-72] + movaps xmm10, XMMWORD PTR [r11-88] + movaps xmm11, XMMWORD PTR [r11-104] + movaps xmm12, XMMWORD PTR [r11-120] + movaps xmm14, XMMWORD PTR [rsp+32] + movaps xmm15, XMMWORD PTR [rsp+16] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + jmp rwz_cnv2_double_mainloop_asm_endp + +rwz_div_fix_1: + dec rbx + add r11, rdx + jmp rwz_div_fix_1_ret + +rwz_div_fix_2: + dec rdx + add r8, r9 + jmp rwz_div_fix_2_ret + +rwz_sqrt_fix_1: + movq r8, xmm3 + movdqa xmm0, xmm5 + psrldq xmm0, 8 + dec r9 + mov r11d, -1022 + shl r11, 32 + mov rax, r9 + shr r9, 19 + shr rax, 20 + mov rdx, r9 + sub rdx, rax + lea rdx, [rdx+r11+1] + add rax, r11 + imul rdx, rax + sub rdx, r8 + adc r9, 0 + movq xmm5, r9 + punpcklqdq xmm5, xmm0 + jmp rwz_sqrt_fix_1_ret + +rwz_sqrt_fix_2: + psrldq xmm3, 8 + movq r11, xmm3 + dec r8 + mov ebx, -1022 + shl rbx, 32 + mov rax, r8 + shr r8, 19 + shr rax, 20 + mov rdx, r8 + sub rdx, rax + lea rdx, [rdx+rbx+1] + add rax, rbx + imul rdx, rax + sub rdx, r11 + adc r8, 0 + movq xmm0, r8 + punpcklqdq xmm5, xmm0 + jmp rwz_sqrt_fix_2_ret + +rwz_cnv2_double_mainloop_asm_endp: diff --git a/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc new file mode 100644 index 00000000..021f787e --- /dev/null +++ b/src/crypto/asm/cn2/cnv2_rwz_main_loop.inc @@ -0,0 +1,186 @@ + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 80 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov esi, 393216 + mov r8, QWORD PTR [rcx+32] + mov r13d, -2147483647 + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movq xmm4, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movq xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + movq xmm3, QWORD PTR [r9+104] + movaps XMMWORD PTR [rsp+64], xmm6 + movaps XMMWORD PTR [rsp+48], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + and r10d, 2097136 + movq xmm5, rax + + xor eax, eax + mov QWORD PTR [rsp+16], rax + + mov ax, 1023 + shl rax, 52 + movq xmm8, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movq xmm0, rcx + punpcklqdq xmm5, xmm0 + movdqu xmm6, XMMWORD PTR [r10+rbx] + + ALIGN(64) +rwz_main_loop: + lea rdx, QWORD PTR [r10+rbx] + mov ecx, r10d + mov eax, r10d + mov rdi, r15 + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + movq xmm0, r11 + movq xmm7, r8 + punpcklqdq xmm7, xmm0 + aesenc xmm6, xmm7 + movq rbp, xmm6 + mov r9, rbp + and r9d, 2097136 + movdqu xmm0, XMMWORD PTR [rcx+rbx] + movdqu xmm1, XMMWORD PTR [rax+rbx] + movdqu xmm2, XMMWORD PTR [r10+rbx] + paddq xmm0, xmm5 + paddq xmm1, xmm7 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [rcx+rbx], xmm0 + movdqu XMMWORD PTR [rax+rbx], xmm2 + movdqu XMMWORD PTR [r10+rbx], xmm1 + mov r10, r9 + xor r10d, 32 + movq rcx, xmm3 + mov rax, rcx + shl rax, 32 + xor rdi, rax + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [rdx], xmm0 + xor rdi, QWORD PTR [r9+rbx] + lea r14, QWORD PTR [r9+rbx] + mov r12, QWORD PTR [r14+8] + xor edx, edx + lea r9d, DWORD PTR [ecx+ecx] + add r9d, ebp + movdqa xmm0, xmm6 + psrldq xmm0, 8 + or r9d, r13d + movq rax, xmm0 + div r9 + xorps xmm3, xmm3 + mov eax, eax + shl rdx, 32 + add rdx, rax + lea r9, QWORD PTR [rdx+rbp] + mov r15, rdx + mov rax, r9 + shr rax, 12 + movq xmm0, rax + paddq xmm0, xmm8 + sqrtsd xmm3, xmm0 + psubq xmm3, XMMWORD PTR [rsp+16] + movq rdx, xmm3 + test edx, 524287 + je rwz_sqrt_fixup + psrlq xmm3, 19 +rwz_sqrt_fixup_ret: + + mov ecx, r10d + mov rax, rdi + mul rbp + movq xmm2, rdx + xor rdx, [rcx+rbx] + add r8, rdx + mov QWORD PTR [r14], r8 + xor r8, rdi + mov edi, r8d + and edi, 2097136 + movq xmm0, rax + xor rax, [rcx+rbx+8] + add r11, rax + mov QWORD PTR [r14+8], r11 + punpcklqdq xmm2, xmm0 + + mov r9d, r10d + xor r9d, 48 + xor r10d, 16 + pxor xmm2, XMMWORD PTR [r9+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm0, xmm4 + movdqu xmm1, XMMWORD PTR [rcx+rbx] + paddq xmm2, xmm5 + paddq xmm1, xmm7 + movdqa xmm5, xmm4 + movdqu XMMWORD PTR [r9+rbx], xmm2 + movdqa xmm4, xmm6 + movdqu XMMWORD PTR [rcx+rbx], xmm0 + movdqu XMMWORD PTR [r10+rbx], xmm1 + movdqu xmm6, [rdi+rbx] + mov r10d, edi + xor r11, r12 + dec rsi + jne rwz_main_loop + + ldmxcsr DWORD PTR [rsp] + mov rbx, QWORD PTR [rsp+160] + movaps xmm6, XMMWORD PTR [rsp+64] + movaps xmm7, XMMWORD PTR [rsp+48] + movaps xmm8, XMMWORD PTR [rsp+32] + add rsp, 80 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + jmp cnv2_rwz_main_loop_endp + +rwz_sqrt_fixup: + dec rdx + mov r13d, -1022 + shl r13, 32 + mov rax, rdx + shr rdx, 19 + shr rax, 20 + mov rcx, rdx + sub rcx, rax + add rax, r13 + not r13 + sub rcx, r13 + mov r13d, -2147483647 + imul rcx, rax + sub rcx, r9 + adc rdx, 0 + movq xmm3, rdx + jmp rwz_sqrt_fixup_ret + +cnv2_rwz_main_loop_endp: diff --git a/src/crypto/asm/cn_main_loop.S b/src/crypto/asm/cn_main_loop.S index a792337f..c2c08739 100644 --- a/src/crypto/asm/cn_main_loop.S +++ b/src/crypto/asm/cn_main_loop.S @@ -15,6 +15,8 @@ .global FN_PREFIX(cnv2_mainloop_ryzen_asm) .global FN_PREFIX(cnv2_mainloop_bulldozer_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) +.global FN_PREFIX(cnv2_rwz_mainloop_asm) +.global FN_PREFIX(cnv2_rwz_double_mainloop_asm) ALIGN(64) FN_PREFIX(cnv2_mainloop_ivybridge_asm): @@ -52,3 +54,21 @@ FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): add rsp, 48 ret 0 mov eax, 3735929054 + +ALIGN(64) +FN_PREFIX(cnv2_rwz_mainloop_asm): + sub rsp, 48 + mov rcx, rdi + #include "cn2/cnv2_rwz_main_loop.inc" + add rsp, 48 + ret 0 + mov eax, 3735929054 + +ALIGN(64) +FN_PREFIX(cnv2_rwz_double_mainloop_asm): + sub rsp, 48 + mov rcx, rdi + #include "cn2/cnv2_rwz_double_main_loop.inc" + add rsp, 48 + ret 0 + mov eax, 3735929054 diff --git a/src/crypto/asm/cn_main_loop.asm b/src/crypto/asm/cn_main_loop.asm index f1384be8..f0766a7c 100644 --- a/src/crypto/asm/cn_main_loop.asm +++ b/src/crypto/asm/cn_main_loop.asm @@ -3,6 +3,8 @@ PUBLIC cnv2_mainloop_ivybridge_asm PUBLIC cnv2_mainloop_ryzen_asm PUBLIC cnv2_mainloop_bulldozer_asm PUBLIC cnv2_double_mainloop_sandybridge_asm +PUBLIC cnv2_rwz_mainloop_asm +PUBLIC cnv2_rwz_double_mainloop_asm ALIGN(64) cnv2_mainloop_ivybridge_asm PROC @@ -32,5 +34,19 @@ cnv2_double_mainloop_sandybridge_asm PROC mov eax, 3735929054 cnv2_double_mainloop_sandybridge_asm ENDP +ALIGN(64) +cnv2_rwz_mainloop_asm PROC + INCLUDE cn2/cnv2_rwz_main_loop.inc + ret 0 + mov eax, 3735929054 +cnv2_rwz_mainloop_asm ENDP + +ALIGN(64) +cnv2_rwz_double_mainloop_asm PROC + INCLUDE cn2/cnv2_rwz_double_main_loop.inc + ret 0 + mov eax, 3735929054 +cnv2_rwz_double_mainloop_asm ENDP + _TEXT_CNV2_MAINLOOP ENDS END diff --git a/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc new file mode 100644 index 00000000..d6d393a9 --- /dev/null +++ b/src/crypto/asm/win64/CryptonightR_soft_aes_template_win.inc @@ -0,0 +1,279 @@ +PUBLIC CryptonightR_soft_aes_template_part1 +PUBLIC CryptonightR_soft_aes_template_mainloop +PUBLIC CryptonightR_soft_aes_template_part2 +PUBLIC CryptonightR_soft_aes_template_part3 +PUBLIC CryptonightR_soft_aes_template_end + +ALIGN(64) +CryptonightR_soft_aes_template_part1: + mov QWORD PTR [rsp+8], rcx + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 232 + + mov eax, [rcx+96] + mov ebx, [rcx+100] + mov esi, [rcx+104] + mov edx, [rcx+108] + mov [rsp+144], eax + mov [rsp+148], ebx + mov [rsp+152], esi + mov [rsp+156], edx + + mov rax, QWORD PTR [rcx+48] + mov r10, rcx + xor rax, QWORD PTR [rcx+16] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r9, QWORD PTR [rcx+40] + xor r9, QWORD PTR [rcx+8] + movd xmm4, rax + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r11, QWORD PTR [rcx+224] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r10+72] + mov rax, QWORD PTR [r10+80] + movd xmm0, rdx + xor rax, QWORD PTR [r10+64] + + movaps XMMWORD PTR [rsp+16], xmm6 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+48], xmm8 + movaps XMMWORD PTR [rsp+64], xmm9 + movaps XMMWORD PTR [rsp+80], xmm10 + movaps XMMWORD PTR [rsp+96], xmm11 + movaps XMMWORD PTR [rsp+112], xmm12 + movaps XMMWORD PTR [rsp+128], xmm13 + + movd xmm5, rax + + mov rax, r8 + punpcklqdq xmm4, xmm0 + and eax, 2097136 + movd xmm10, QWORD PTR [r10+96] + movd xmm0, rcx + mov rcx, QWORD PTR [r10+104] + xorps xmm9, xmm9 + mov QWORD PTR [rsp+328], rax + movd xmm12, r11 + mov QWORD PTR [rsp+320], r9 + punpcklqdq xmm5, xmm0 + movd xmm13, rcx + mov r12d, 524288 + + ALIGN(64) +CryptonightR_soft_aes_template_mainloop: + movd xmm11, r12d + mov r12, QWORD PTR [r10+272] + lea r13, QWORD PTR [rax+r11] + mov esi, DWORD PTR [r13] + movd xmm0, r9 + mov r10d, DWORD PTR [r13+4] + movd xmm7, r8 + mov ebp, DWORD PTR [r13+12] + mov r14d, DWORD PTR [r13+8] + mov rdx, QWORD PTR [rsp+328] + movzx ecx, sil + shr esi, 8 + punpcklqdq xmm7, xmm0 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + movd xmm1, r11d + add ebp, 256 + movd r11, xmm12 + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + xor eax, DWORD PTR [r12+rcx*4] + mov rcx, rdx + xor eax, r15d + punpckldq xmm2, xmm1 + xor rcx, 16 + movd xmm6, eax + mov rax, rdx + punpckldq xmm6, xmm0 + xor rax, 32 + punpckldq xmm6, xmm2 + xor rdx, 48 + movdqu xmm2, XMMWORD PTR [rcx+r11] + pxor xmm6, xmm2 + pxor xmm6, xmm7 + paddq xmm2, xmm4 + movdqu xmm1, XMMWORD PTR [rax+r11] + movdqu xmm0, XMMWORD PTR [rdx+r11] + pxor xmm6, xmm1 + pxor xmm6, xmm0 + paddq xmm0, xmm5 + movdqu XMMWORD PTR [rcx+r11], xmm0 + movdqu XMMWORD PTR [rax+r11], xmm2 + movd rcx, xmm13 + paddq xmm1, xmm7 + movdqu XMMWORD PTR [rdx+r11], xmm1 + movd rdi, xmm6 + mov r10, rdi + and r10d, 2097136 + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [r13], xmm0 + + mov ebx, [rsp+144] + mov ebp, [rsp+152] + add ebx, [rsp+148] + add ebp, [rsp+156] + shl rbp, 32 + or rbx, rbp + + xor rbx, QWORD PTR [r10+r11] + lea r14, QWORD PTR [r10+r11] + mov rbp, QWORD PTR [r14+8] + + mov [rsp+160], rbx + mov [rsp+168], rdi + mov [rsp+176], rbp + mov [rsp+184], r10 + mov r10, rsp + + mov ebx, [rsp+144] + mov esi, [rsp+148] + mov edi, [rsp+152] + mov ebp, [rsp+156] + + movd esp, xmm7 + movaps xmm0, xmm7 + psrldq xmm0, 8 + movd r15d, xmm0 + movd eax, xmm4 + movd edx, xmm5 + movaps xmm0, xmm5 + psrldq xmm0, 8 + movd r9d, xmm0 + +CryptonightR_soft_aes_template_part2: + mov rsp, r10 + mov [rsp+144], ebx + mov [rsp+148], esi + mov [rsp+152], edi + mov [rsp+156], ebp + + mov edi, edi + shl rbp, 32 + or rbp, rdi + xor r8, rbp + + mov ebx, ebx + shl rsi, 32 + or rsi, rbx + xor QWORD PTR [rsp+320], rsi + + mov rbx, [rsp+160] + mov rdi, [rsp+168] + mov rbp, [rsp+176] + mov r10, [rsp+184] + + mov r9, r10 + xor r9, 16 + mov rcx, r10 + xor rcx, 32 + xor r10, 48 + mov rax, rbx + mul rdi + movdqu xmm2, XMMWORD PTR [r9+r11] + movdqu xmm1, XMMWORD PTR [rcx+r11] + pxor xmm6, xmm2 + pxor xmm6, xmm1 + paddq xmm1, xmm7 + add r8, rdx + movdqu xmm0, XMMWORD PTR [r10+r11] + pxor xmm6, xmm0 + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [r9+r11], xmm0 + movdqa xmm5, xmm4 + mov r9, QWORD PTR [rsp+320] + movdqa xmm4, xmm6 + add r9, rax + movdqu XMMWORD PTR [rcx+r11], xmm2 + movdqu XMMWORD PTR [r10+r11], xmm1 + mov r10, QWORD PTR [rsp+304] + movd r12d, xmm11 + mov QWORD PTR [r14], r8 + xor r8, rbx + mov rax, r8 + mov QWORD PTR [r14+8], r9 + and eax, 2097136 + xor r9, rbp + mov QWORD PTR [rsp+320], r9 + mov QWORD PTR [rsp+328], rax + sub r12d, 1 + jne CryptonightR_soft_aes_template_mainloop + +CryptonightR_soft_aes_template_part3: + movaps xmm6, XMMWORD PTR [rsp+16] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+48] + movaps xmm9, XMMWORD PTR [rsp+64] + movaps xmm10, XMMWORD PTR [rsp+80] + movaps xmm11, XMMWORD PTR [rsp+96] + movaps xmm12, XMMWORD PTR [rsp+112] + movaps xmm13, XMMWORD PTR [rsp+128] + + add rsp, 232 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + ret +CryptonightR_soft_aes_template_end: diff --git a/src/crypto/asm/win64/CryptonightR_template.S b/src/crypto/asm/win64/CryptonightR_template.S deleted file mode 100644 index 5f3046cb..00000000 --- a/src/crypto/asm/win64/CryptonightR_template.S +++ /dev/null @@ -1,1593 +0,0 @@ -#ifdef __APPLE__ -# define ALIGN(x) .align 6 -#else -# define ALIGN(x) .align 64 -#endif -.intel_syntax noprefix -#ifdef __APPLE__ -# define FN_PREFIX(fn) _ ## fn -.text -#else -# define FN_PREFIX(fn) fn -.section .text -#endif - -#define PUBLIC .global - -PUBLIC FN_PREFIX(CryptonightR_instruction0) -PUBLIC FN_PREFIX(CryptonightR_instruction1) -PUBLIC FN_PREFIX(CryptonightR_instruction2) -PUBLIC FN_PREFIX(CryptonightR_instruction3) -PUBLIC FN_PREFIX(CryptonightR_instruction4) -PUBLIC FN_PREFIX(CryptonightR_instruction5) -PUBLIC FN_PREFIX(CryptonightR_instruction6) -PUBLIC FN_PREFIX(CryptonightR_instruction7) -PUBLIC FN_PREFIX(CryptonightR_instruction8) -PUBLIC FN_PREFIX(CryptonightR_instruction9) -PUBLIC FN_PREFIX(CryptonightR_instruction10) -PUBLIC FN_PREFIX(CryptonightR_instruction11) -PUBLIC FN_PREFIX(CryptonightR_instruction12) -PUBLIC FN_PREFIX(CryptonightR_instruction13) -PUBLIC FN_PREFIX(CryptonightR_instruction14) -PUBLIC FN_PREFIX(CryptonightR_instruction15) -PUBLIC FN_PREFIX(CryptonightR_instruction16) -PUBLIC FN_PREFIX(CryptonightR_instruction17) -PUBLIC FN_PREFIX(CryptonightR_instruction18) -PUBLIC FN_PREFIX(CryptonightR_instruction19) -PUBLIC FN_PREFIX(CryptonightR_instruction20) -PUBLIC FN_PREFIX(CryptonightR_instruction21) -PUBLIC FN_PREFIX(CryptonightR_instruction22) -PUBLIC FN_PREFIX(CryptonightR_instruction23) -PUBLIC FN_PREFIX(CryptonightR_instruction24) -PUBLIC FN_PREFIX(CryptonightR_instruction25) -PUBLIC FN_PREFIX(CryptonightR_instruction26) -PUBLIC FN_PREFIX(CryptonightR_instruction27) -PUBLIC FN_PREFIX(CryptonightR_instruction28) -PUBLIC FN_PREFIX(CryptonightR_instruction29) -PUBLIC FN_PREFIX(CryptonightR_instruction30) -PUBLIC FN_PREFIX(CryptonightR_instruction31) -PUBLIC FN_PREFIX(CryptonightR_instruction32) -PUBLIC FN_PREFIX(CryptonightR_instruction33) -PUBLIC FN_PREFIX(CryptonightR_instruction34) -PUBLIC FN_PREFIX(CryptonightR_instruction35) -PUBLIC FN_PREFIX(CryptonightR_instruction36) -PUBLIC FN_PREFIX(CryptonightR_instruction37) -PUBLIC FN_PREFIX(CryptonightR_instruction38) -PUBLIC FN_PREFIX(CryptonightR_instruction39) -PUBLIC FN_PREFIX(CryptonightR_instruction40) -PUBLIC FN_PREFIX(CryptonightR_instruction41) -PUBLIC FN_PREFIX(CryptonightR_instruction42) -PUBLIC FN_PREFIX(CryptonightR_instruction43) -PUBLIC FN_PREFIX(CryptonightR_instruction44) -PUBLIC FN_PREFIX(CryptonightR_instruction45) -PUBLIC FN_PREFIX(CryptonightR_instruction46) -PUBLIC FN_PREFIX(CryptonightR_instruction47) -PUBLIC FN_PREFIX(CryptonightR_instruction48) -PUBLIC FN_PREFIX(CryptonightR_instruction49) -PUBLIC FN_PREFIX(CryptonightR_instruction50) -PUBLIC FN_PREFIX(CryptonightR_instruction51) -PUBLIC FN_PREFIX(CryptonightR_instruction52) -PUBLIC FN_PREFIX(CryptonightR_instruction53) -PUBLIC FN_PREFIX(CryptonightR_instruction54) -PUBLIC FN_PREFIX(CryptonightR_instruction55) -PUBLIC FN_PREFIX(CryptonightR_instruction56) -PUBLIC FN_PREFIX(CryptonightR_instruction57) -PUBLIC FN_PREFIX(CryptonightR_instruction58) -PUBLIC FN_PREFIX(CryptonightR_instruction59) -PUBLIC FN_PREFIX(CryptonightR_instruction60) -PUBLIC FN_PREFIX(CryptonightR_instruction61) -PUBLIC FN_PREFIX(CryptonightR_instruction62) -PUBLIC FN_PREFIX(CryptonightR_instruction63) -PUBLIC FN_PREFIX(CryptonightR_instruction64) -PUBLIC FN_PREFIX(CryptonightR_instruction65) -PUBLIC FN_PREFIX(CryptonightR_instruction66) -PUBLIC FN_PREFIX(CryptonightR_instruction67) -PUBLIC FN_PREFIX(CryptonightR_instruction68) -PUBLIC FN_PREFIX(CryptonightR_instruction69) -PUBLIC FN_PREFIX(CryptonightR_instruction70) -PUBLIC FN_PREFIX(CryptonightR_instruction71) -PUBLIC FN_PREFIX(CryptonightR_instruction72) -PUBLIC FN_PREFIX(CryptonightR_instruction73) -PUBLIC FN_PREFIX(CryptonightR_instruction74) -PUBLIC FN_PREFIX(CryptonightR_instruction75) -PUBLIC FN_PREFIX(CryptonightR_instruction76) -PUBLIC FN_PREFIX(CryptonightR_instruction77) -PUBLIC FN_PREFIX(CryptonightR_instruction78) -PUBLIC FN_PREFIX(CryptonightR_instruction79) -PUBLIC FN_PREFIX(CryptonightR_instruction80) -PUBLIC FN_PREFIX(CryptonightR_instruction81) -PUBLIC FN_PREFIX(CryptonightR_instruction82) -PUBLIC FN_PREFIX(CryptonightR_instruction83) -PUBLIC FN_PREFIX(CryptonightR_instruction84) -PUBLIC FN_PREFIX(CryptonightR_instruction85) -PUBLIC FN_PREFIX(CryptonightR_instruction86) -PUBLIC FN_PREFIX(CryptonightR_instruction87) -PUBLIC FN_PREFIX(CryptonightR_instruction88) -PUBLIC FN_PREFIX(CryptonightR_instruction89) -PUBLIC FN_PREFIX(CryptonightR_instruction90) -PUBLIC FN_PREFIX(CryptonightR_instruction91) -PUBLIC FN_PREFIX(CryptonightR_instruction92) -PUBLIC FN_PREFIX(CryptonightR_instruction93) -PUBLIC FN_PREFIX(CryptonightR_instruction94) -PUBLIC FN_PREFIX(CryptonightR_instruction95) -PUBLIC FN_PREFIX(CryptonightR_instruction96) -PUBLIC FN_PREFIX(CryptonightR_instruction97) -PUBLIC FN_PREFIX(CryptonightR_instruction98) -PUBLIC FN_PREFIX(CryptonightR_instruction99) -PUBLIC FN_PREFIX(CryptonightR_instruction100) -PUBLIC FN_PREFIX(CryptonightR_instruction101) -PUBLIC FN_PREFIX(CryptonightR_instruction102) -PUBLIC FN_PREFIX(CryptonightR_instruction103) -PUBLIC FN_PREFIX(CryptonightR_instruction104) -PUBLIC FN_PREFIX(CryptonightR_instruction105) -PUBLIC FN_PREFIX(CryptonightR_instruction106) -PUBLIC FN_PREFIX(CryptonightR_instruction107) -PUBLIC FN_PREFIX(CryptonightR_instruction108) -PUBLIC FN_PREFIX(CryptonightR_instruction109) -PUBLIC FN_PREFIX(CryptonightR_instruction110) -PUBLIC FN_PREFIX(CryptonightR_instruction111) -PUBLIC FN_PREFIX(CryptonightR_instruction112) -PUBLIC FN_PREFIX(CryptonightR_instruction113) -PUBLIC FN_PREFIX(CryptonightR_instruction114) -PUBLIC FN_PREFIX(CryptonightR_instruction115) -PUBLIC FN_PREFIX(CryptonightR_instruction116) -PUBLIC FN_PREFIX(CryptonightR_instruction117) -PUBLIC FN_PREFIX(CryptonightR_instruction118) -PUBLIC FN_PREFIX(CryptonightR_instruction119) -PUBLIC FN_PREFIX(CryptonightR_instruction120) -PUBLIC FN_PREFIX(CryptonightR_instruction121) -PUBLIC FN_PREFIX(CryptonightR_instruction122) -PUBLIC FN_PREFIX(CryptonightR_instruction123) -PUBLIC FN_PREFIX(CryptonightR_instruction124) -PUBLIC FN_PREFIX(CryptonightR_instruction125) -PUBLIC FN_PREFIX(CryptonightR_instruction126) -PUBLIC FN_PREFIX(CryptonightR_instruction127) -PUBLIC FN_PREFIX(CryptonightR_instruction128) -PUBLIC FN_PREFIX(CryptonightR_instruction129) -PUBLIC FN_PREFIX(CryptonightR_instruction130) -PUBLIC FN_PREFIX(CryptonightR_instruction131) -PUBLIC FN_PREFIX(CryptonightR_instruction132) -PUBLIC FN_PREFIX(CryptonightR_instruction133) -PUBLIC FN_PREFIX(CryptonightR_instruction134) -PUBLIC FN_PREFIX(CryptonightR_instruction135) -PUBLIC FN_PREFIX(CryptonightR_instruction136) -PUBLIC FN_PREFIX(CryptonightR_instruction137) -PUBLIC FN_PREFIX(CryptonightR_instruction138) -PUBLIC FN_PREFIX(CryptonightR_instruction139) -PUBLIC FN_PREFIX(CryptonightR_instruction140) -PUBLIC FN_PREFIX(CryptonightR_instruction141) -PUBLIC FN_PREFIX(CryptonightR_instruction142) -PUBLIC FN_PREFIX(CryptonightR_instruction143) -PUBLIC FN_PREFIX(CryptonightR_instruction144) -PUBLIC FN_PREFIX(CryptonightR_instruction145) -PUBLIC FN_PREFIX(CryptonightR_instruction146) -PUBLIC FN_PREFIX(CryptonightR_instruction147) -PUBLIC FN_PREFIX(CryptonightR_instruction148) -PUBLIC FN_PREFIX(CryptonightR_instruction149) -PUBLIC FN_PREFIX(CryptonightR_instruction150) -PUBLIC FN_PREFIX(CryptonightR_instruction151) -PUBLIC FN_PREFIX(CryptonightR_instruction152) -PUBLIC FN_PREFIX(CryptonightR_instruction153) -PUBLIC FN_PREFIX(CryptonightR_instruction154) -PUBLIC FN_PREFIX(CryptonightR_instruction155) -PUBLIC FN_PREFIX(CryptonightR_instruction156) -PUBLIC FN_PREFIX(CryptonightR_instruction157) -PUBLIC FN_PREFIX(CryptonightR_instruction158) -PUBLIC FN_PREFIX(CryptonightR_instruction159) -PUBLIC FN_PREFIX(CryptonightR_instruction160) -PUBLIC FN_PREFIX(CryptonightR_instruction161) -PUBLIC FN_PREFIX(CryptonightR_instruction162) -PUBLIC FN_PREFIX(CryptonightR_instruction163) -PUBLIC FN_PREFIX(CryptonightR_instruction164) -PUBLIC FN_PREFIX(CryptonightR_instruction165) -PUBLIC FN_PREFIX(CryptonightR_instruction166) -PUBLIC FN_PREFIX(CryptonightR_instruction167) -PUBLIC FN_PREFIX(CryptonightR_instruction168) -PUBLIC FN_PREFIX(CryptonightR_instruction169) -PUBLIC FN_PREFIX(CryptonightR_instruction170) -PUBLIC FN_PREFIX(CryptonightR_instruction171) -PUBLIC FN_PREFIX(CryptonightR_instruction172) -PUBLIC FN_PREFIX(CryptonightR_instruction173) -PUBLIC FN_PREFIX(CryptonightR_instruction174) -PUBLIC FN_PREFIX(CryptonightR_instruction175) -PUBLIC FN_PREFIX(CryptonightR_instruction176) -PUBLIC FN_PREFIX(CryptonightR_instruction177) -PUBLIC FN_PREFIX(CryptonightR_instruction178) -PUBLIC FN_PREFIX(CryptonightR_instruction179) -PUBLIC FN_PREFIX(CryptonightR_instruction180) -PUBLIC FN_PREFIX(CryptonightR_instruction181) -PUBLIC FN_PREFIX(CryptonightR_instruction182) -PUBLIC FN_PREFIX(CryptonightR_instruction183) -PUBLIC FN_PREFIX(CryptonightR_instruction184) -PUBLIC FN_PREFIX(CryptonightR_instruction185) -PUBLIC FN_PREFIX(CryptonightR_instruction186) -PUBLIC FN_PREFIX(CryptonightR_instruction187) -PUBLIC FN_PREFIX(CryptonightR_instruction188) -PUBLIC FN_PREFIX(CryptonightR_instruction189) -PUBLIC FN_PREFIX(CryptonightR_instruction190) -PUBLIC FN_PREFIX(CryptonightR_instruction191) -PUBLIC FN_PREFIX(CryptonightR_instruction192) -PUBLIC FN_PREFIX(CryptonightR_instruction193) -PUBLIC FN_PREFIX(CryptonightR_instruction194) -PUBLIC FN_PREFIX(CryptonightR_instruction195) -PUBLIC FN_PREFIX(CryptonightR_instruction196) -PUBLIC FN_PREFIX(CryptonightR_instruction197) -PUBLIC FN_PREFIX(CryptonightR_instruction198) -PUBLIC FN_PREFIX(CryptonightR_instruction199) -PUBLIC FN_PREFIX(CryptonightR_instruction200) -PUBLIC FN_PREFIX(CryptonightR_instruction201) -PUBLIC FN_PREFIX(CryptonightR_instruction202) -PUBLIC FN_PREFIX(CryptonightR_instruction203) -PUBLIC FN_PREFIX(CryptonightR_instruction204) -PUBLIC FN_PREFIX(CryptonightR_instruction205) -PUBLIC FN_PREFIX(CryptonightR_instruction206) -PUBLIC FN_PREFIX(CryptonightR_instruction207) -PUBLIC FN_PREFIX(CryptonightR_instruction208) -PUBLIC FN_PREFIX(CryptonightR_instruction209) -PUBLIC FN_PREFIX(CryptonightR_instruction210) -PUBLIC FN_PREFIX(CryptonightR_instruction211) -PUBLIC FN_PREFIX(CryptonightR_instruction212) -PUBLIC FN_PREFIX(CryptonightR_instruction213) -PUBLIC FN_PREFIX(CryptonightR_instruction214) -PUBLIC FN_PREFIX(CryptonightR_instruction215) -PUBLIC FN_PREFIX(CryptonightR_instruction216) -PUBLIC FN_PREFIX(CryptonightR_instruction217) -PUBLIC FN_PREFIX(CryptonightR_instruction218) -PUBLIC FN_PREFIX(CryptonightR_instruction219) -PUBLIC FN_PREFIX(CryptonightR_instruction220) -PUBLIC FN_PREFIX(CryptonightR_instruction221) -PUBLIC FN_PREFIX(CryptonightR_instruction222) -PUBLIC FN_PREFIX(CryptonightR_instruction223) -PUBLIC FN_PREFIX(CryptonightR_instruction224) -PUBLIC FN_PREFIX(CryptonightR_instruction225) -PUBLIC FN_PREFIX(CryptonightR_instruction226) -PUBLIC FN_PREFIX(CryptonightR_instruction227) -PUBLIC FN_PREFIX(CryptonightR_instruction228) -PUBLIC FN_PREFIX(CryptonightR_instruction229) -PUBLIC FN_PREFIX(CryptonightR_instruction230) -PUBLIC FN_PREFIX(CryptonightR_instruction231) -PUBLIC FN_PREFIX(CryptonightR_instruction232) -PUBLIC FN_PREFIX(CryptonightR_instruction233) -PUBLIC FN_PREFIX(CryptonightR_instruction234) -PUBLIC FN_PREFIX(CryptonightR_instruction235) -PUBLIC FN_PREFIX(CryptonightR_instruction236) -PUBLIC FN_PREFIX(CryptonightR_instruction237) -PUBLIC FN_PREFIX(CryptonightR_instruction238) -PUBLIC FN_PREFIX(CryptonightR_instruction239) -PUBLIC FN_PREFIX(CryptonightR_instruction240) -PUBLIC FN_PREFIX(CryptonightR_instruction241) -PUBLIC FN_PREFIX(CryptonightR_instruction242) -PUBLIC FN_PREFIX(CryptonightR_instruction243) -PUBLIC FN_PREFIX(CryptonightR_instruction244) -PUBLIC FN_PREFIX(CryptonightR_instruction245) -PUBLIC FN_PREFIX(CryptonightR_instruction246) -PUBLIC FN_PREFIX(CryptonightR_instruction247) -PUBLIC FN_PREFIX(CryptonightR_instruction248) -PUBLIC FN_PREFIX(CryptonightR_instruction249) -PUBLIC FN_PREFIX(CryptonightR_instruction250) -PUBLIC FN_PREFIX(CryptonightR_instruction251) -PUBLIC FN_PREFIX(CryptonightR_instruction252) -PUBLIC FN_PREFIX(CryptonightR_instruction253) -PUBLIC FN_PREFIX(CryptonightR_instruction254) -PUBLIC FN_PREFIX(CryptonightR_instruction255) -PUBLIC FN_PREFIX(CryptonightR_instruction256) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov0) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov1) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov2) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov3) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov4) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov5) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov6) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov7) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov8) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov9) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov10) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov11) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov12) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov13) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov14) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov15) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov16) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov17) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov18) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov19) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov20) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov21) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov22) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov23) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov24) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov25) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov26) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov27) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov28) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov29) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov30) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov31) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov32) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov33) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov34) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov35) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov36) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov37) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov38) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov39) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov40) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov41) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov42) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov43) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov44) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov45) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov46) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov47) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov48) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov49) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov50) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov51) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov52) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov53) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov54) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov55) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov56) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov57) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov58) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov59) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov60) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov61) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov62) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov63) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov64) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov65) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov66) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov67) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov68) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov69) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov70) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov71) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov72) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov73) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov74) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov75) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov76) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov77) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov78) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov79) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov80) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov81) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov82) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov83) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov84) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov85) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov86) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov87) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov88) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov89) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov90) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov91) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov92) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov93) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov94) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov95) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov96) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov97) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov98) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov99) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov100) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov101) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov102) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov103) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov104) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov105) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov106) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov107) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov108) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov109) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov110) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov111) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov112) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov113) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov114) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov115) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov116) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov117) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov118) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov119) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov120) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov121) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov122) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov123) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov124) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov125) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov126) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov127) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov128) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov129) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov130) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov131) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov132) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov133) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov134) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov135) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov136) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov137) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov138) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov139) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov140) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov141) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov142) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov143) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov144) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov145) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov146) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov147) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov148) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov149) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov150) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov151) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov152) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov153) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov154) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov155) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov156) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov157) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov158) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov159) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov160) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov161) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov162) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov163) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov164) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov165) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov166) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov167) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov168) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov169) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov170) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov171) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov172) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov173) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov174) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov175) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov176) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov177) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov178) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov179) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov180) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov181) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov182) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov183) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov184) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov185) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov186) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov187) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov188) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov189) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov190) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov191) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov192) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov193) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov194) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov195) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov196) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov197) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov198) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov199) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov200) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov201) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov202) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov203) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov204) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov205) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov206) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov207) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov208) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov209) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov210) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov211) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov212) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov213) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov214) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov215) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov216) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov217) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov218) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov219) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov220) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov221) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov222) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov223) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov224) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov225) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov226) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov227) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov228) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov229) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov230) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov231) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov232) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov233) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov234) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov235) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov236) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov237) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov238) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov239) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov240) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov241) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov242) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov243) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov244) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov245) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov246) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov247) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov248) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov249) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov250) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov251) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov252) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov253) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov254) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov255) -PUBLIC FN_PREFIX(CryptonightR_instruction_mov256) - -#include "CryptonightWOW_template.inc" -#include "CryptonightR_template.inc" - -FN_PREFIX(CryptonightR_instruction0): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction1): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction2): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction3): - add rbx, r9 - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction4): - sub rbx, r9 -FN_PREFIX(CryptonightR_instruction5): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction6): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction7): - xor rbx, r9 -FN_PREFIX(CryptonightR_instruction8): - imul rsi, rbx -FN_PREFIX(CryptonightR_instruction9): - imul rsi, rbx -FN_PREFIX(CryptonightR_instruction10): - imul rsi, rbx -FN_PREFIX(CryptonightR_instruction11): - add rsi, rbx - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction12): - sub rsi, rbx -FN_PREFIX(CryptonightR_instruction13): - ror esi, cl -FN_PREFIX(CryptonightR_instruction14): - rol esi, cl -FN_PREFIX(CryptonightR_instruction15): - xor rsi, rbx -FN_PREFIX(CryptonightR_instruction16): - imul rdi, rbx -FN_PREFIX(CryptonightR_instruction17): - imul rdi, rbx -FN_PREFIX(CryptonightR_instruction18): - imul rdi, rbx -FN_PREFIX(CryptonightR_instruction19): - add rdi, rbx - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction20): - sub rdi, rbx -FN_PREFIX(CryptonightR_instruction21): - ror edi, cl -FN_PREFIX(CryptonightR_instruction22): - rol edi, cl -FN_PREFIX(CryptonightR_instruction23): - xor rdi, rbx -FN_PREFIX(CryptonightR_instruction24): - imul rbp, rbx -FN_PREFIX(CryptonightR_instruction25): - imul rbp, rbx -FN_PREFIX(CryptonightR_instruction26): - imul rbp, rbx -FN_PREFIX(CryptonightR_instruction27): - add rbp, rbx - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction28): - sub rbp, rbx -FN_PREFIX(CryptonightR_instruction29): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction30): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction31): - xor rbp, rbx -FN_PREFIX(CryptonightR_instruction32): - imul rbx, rsi -FN_PREFIX(CryptonightR_instruction33): - imul rbx, rsi -FN_PREFIX(CryptonightR_instruction34): - imul rbx, rsi -FN_PREFIX(CryptonightR_instruction35): - add rbx, rsi - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction36): - sub rbx, rsi -FN_PREFIX(CryptonightR_instruction37): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction38): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction39): - xor rbx, rsi -FN_PREFIX(CryptonightR_instruction40): - imul rsi, rsi -FN_PREFIX(CryptonightR_instruction41): - imul rsi, rsi -FN_PREFIX(CryptonightR_instruction42): - imul rsi, rsi -FN_PREFIX(CryptonightR_instruction43): - add rsi, r9 - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction44): - sub rsi, r9 -FN_PREFIX(CryptonightR_instruction45): - ror esi, cl -FN_PREFIX(CryptonightR_instruction46): - rol esi, cl -FN_PREFIX(CryptonightR_instruction47): - xor rsi, r9 -FN_PREFIX(CryptonightR_instruction48): - imul rdi, rsi -FN_PREFIX(CryptonightR_instruction49): - imul rdi, rsi -FN_PREFIX(CryptonightR_instruction50): - imul rdi, rsi -FN_PREFIX(CryptonightR_instruction51): - add rdi, rsi - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction52): - sub rdi, rsi -FN_PREFIX(CryptonightR_instruction53): - ror edi, cl -FN_PREFIX(CryptonightR_instruction54): - rol edi, cl -FN_PREFIX(CryptonightR_instruction55): - xor rdi, rsi -FN_PREFIX(CryptonightR_instruction56): - imul rbp, rsi -FN_PREFIX(CryptonightR_instruction57): - imul rbp, rsi -FN_PREFIX(CryptonightR_instruction58): - imul rbp, rsi -FN_PREFIX(CryptonightR_instruction59): - add rbp, rsi - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction60): - sub rbp, rsi -FN_PREFIX(CryptonightR_instruction61): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction62): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction63): - xor rbp, rsi -FN_PREFIX(CryptonightR_instruction64): - imul rbx, rdi -FN_PREFIX(CryptonightR_instruction65): - imul rbx, rdi -FN_PREFIX(CryptonightR_instruction66): - imul rbx, rdi -FN_PREFIX(CryptonightR_instruction67): - add rbx, rdi - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction68): - sub rbx, rdi -FN_PREFIX(CryptonightR_instruction69): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction70): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction71): - xor rbx, rdi -FN_PREFIX(CryptonightR_instruction72): - imul rsi, rdi -FN_PREFIX(CryptonightR_instruction73): - imul rsi, rdi -FN_PREFIX(CryptonightR_instruction74): - imul rsi, rdi -FN_PREFIX(CryptonightR_instruction75): - add rsi, rdi - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction76): - sub rsi, rdi -FN_PREFIX(CryptonightR_instruction77): - ror esi, cl -FN_PREFIX(CryptonightR_instruction78): - rol esi, cl -FN_PREFIX(CryptonightR_instruction79): - xor rsi, rdi -FN_PREFIX(CryptonightR_instruction80): - imul rdi, rdi -FN_PREFIX(CryptonightR_instruction81): - imul rdi, rdi -FN_PREFIX(CryptonightR_instruction82): - imul rdi, rdi -FN_PREFIX(CryptonightR_instruction83): - add rdi, r9 - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction84): - sub rdi, r9 -FN_PREFIX(CryptonightR_instruction85): - ror edi, cl -FN_PREFIX(CryptonightR_instruction86): - rol edi, cl -FN_PREFIX(CryptonightR_instruction87): - xor rdi, r9 -FN_PREFIX(CryptonightR_instruction88): - imul rbp, rdi -FN_PREFIX(CryptonightR_instruction89): - imul rbp, rdi -FN_PREFIX(CryptonightR_instruction90): - imul rbp, rdi -FN_PREFIX(CryptonightR_instruction91): - add rbp, rdi - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction92): - sub rbp, rdi -FN_PREFIX(CryptonightR_instruction93): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction94): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction95): - xor rbp, rdi -FN_PREFIX(CryptonightR_instruction96): - imul rbx, rbp -FN_PREFIX(CryptonightR_instruction97): - imul rbx, rbp -FN_PREFIX(CryptonightR_instruction98): - imul rbx, rbp -FN_PREFIX(CryptonightR_instruction99): - add rbx, rbp - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction100): - sub rbx, rbp -FN_PREFIX(CryptonightR_instruction101): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction102): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction103): - xor rbx, rbp -FN_PREFIX(CryptonightR_instruction104): - imul rsi, rbp -FN_PREFIX(CryptonightR_instruction105): - imul rsi, rbp -FN_PREFIX(CryptonightR_instruction106): - imul rsi, rbp -FN_PREFIX(CryptonightR_instruction107): - add rsi, rbp - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction108): - sub rsi, rbp -FN_PREFIX(CryptonightR_instruction109): - ror esi, cl -FN_PREFIX(CryptonightR_instruction110): - rol esi, cl -FN_PREFIX(CryptonightR_instruction111): - xor rsi, rbp -FN_PREFIX(CryptonightR_instruction112): - imul rdi, rbp -FN_PREFIX(CryptonightR_instruction113): - imul rdi, rbp -FN_PREFIX(CryptonightR_instruction114): - imul rdi, rbp -FN_PREFIX(CryptonightR_instruction115): - add rdi, rbp - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction116): - sub rdi, rbp -FN_PREFIX(CryptonightR_instruction117): - ror edi, cl -FN_PREFIX(CryptonightR_instruction118): - rol edi, cl -FN_PREFIX(CryptonightR_instruction119): - xor rdi, rbp -FN_PREFIX(CryptonightR_instruction120): - imul rbp, rbp -FN_PREFIX(CryptonightR_instruction121): - imul rbp, rbp -FN_PREFIX(CryptonightR_instruction122): - imul rbp, rbp -FN_PREFIX(CryptonightR_instruction123): - add rbp, r9 - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction124): - sub rbp, r9 -FN_PREFIX(CryptonightR_instruction125): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction126): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction127): - xor rbp, r9 -FN_PREFIX(CryptonightR_instruction128): - imul rbx, rsp -FN_PREFIX(CryptonightR_instruction129): - imul rbx, rsp -FN_PREFIX(CryptonightR_instruction130): - imul rbx, rsp -FN_PREFIX(CryptonightR_instruction131): - add rbx, rsp - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction132): - sub rbx, rsp -FN_PREFIX(CryptonightR_instruction133): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction134): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction135): - xor rbx, rsp -FN_PREFIX(CryptonightR_instruction136): - imul rsi, rsp -FN_PREFIX(CryptonightR_instruction137): - imul rsi, rsp -FN_PREFIX(CryptonightR_instruction138): - imul rsi, rsp -FN_PREFIX(CryptonightR_instruction139): - add rsi, rsp - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction140): - sub rsi, rsp -FN_PREFIX(CryptonightR_instruction141): - ror esi, cl -FN_PREFIX(CryptonightR_instruction142): - rol esi, cl -FN_PREFIX(CryptonightR_instruction143): - xor rsi, rsp -FN_PREFIX(CryptonightR_instruction144): - imul rdi, rsp -FN_PREFIX(CryptonightR_instruction145): - imul rdi, rsp -FN_PREFIX(CryptonightR_instruction146): - imul rdi, rsp -FN_PREFIX(CryptonightR_instruction147): - add rdi, rsp - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction148): - sub rdi, rsp -FN_PREFIX(CryptonightR_instruction149): - ror edi, cl -FN_PREFIX(CryptonightR_instruction150): - rol edi, cl -FN_PREFIX(CryptonightR_instruction151): - xor rdi, rsp -FN_PREFIX(CryptonightR_instruction152): - imul rbp, rsp -FN_PREFIX(CryptonightR_instruction153): - imul rbp, rsp -FN_PREFIX(CryptonightR_instruction154): - imul rbp, rsp -FN_PREFIX(CryptonightR_instruction155): - add rbp, rsp - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction156): - sub rbp, rsp -FN_PREFIX(CryptonightR_instruction157): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction158): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction159): - xor rbp, rsp -FN_PREFIX(CryptonightR_instruction160): - imul rbx, r15 -FN_PREFIX(CryptonightR_instruction161): - imul rbx, r15 -FN_PREFIX(CryptonightR_instruction162): - imul rbx, r15 -FN_PREFIX(CryptonightR_instruction163): - add rbx, r15 - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction164): - sub rbx, r15 -FN_PREFIX(CryptonightR_instruction165): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction166): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction167): - xor rbx, r15 -FN_PREFIX(CryptonightR_instruction168): - imul rsi, r15 -FN_PREFIX(CryptonightR_instruction169): - imul rsi, r15 -FN_PREFIX(CryptonightR_instruction170): - imul rsi, r15 -FN_PREFIX(CryptonightR_instruction171): - add rsi, r15 - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction172): - sub rsi, r15 -FN_PREFIX(CryptonightR_instruction173): - ror esi, cl -FN_PREFIX(CryptonightR_instruction174): - rol esi, cl -FN_PREFIX(CryptonightR_instruction175): - xor rsi, r15 -FN_PREFIX(CryptonightR_instruction176): - imul rdi, r15 -FN_PREFIX(CryptonightR_instruction177): - imul rdi, r15 -FN_PREFIX(CryptonightR_instruction178): - imul rdi, r15 -FN_PREFIX(CryptonightR_instruction179): - add rdi, r15 - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction180): - sub rdi, r15 -FN_PREFIX(CryptonightR_instruction181): - ror edi, cl -FN_PREFIX(CryptonightR_instruction182): - rol edi, cl -FN_PREFIX(CryptonightR_instruction183): - xor rdi, r15 -FN_PREFIX(CryptonightR_instruction184): - imul rbp, r15 -FN_PREFIX(CryptonightR_instruction185): - imul rbp, r15 -FN_PREFIX(CryptonightR_instruction186): - imul rbp, r15 -FN_PREFIX(CryptonightR_instruction187): - add rbp, r15 - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction188): - sub rbp, r15 -FN_PREFIX(CryptonightR_instruction189): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction190): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction191): - xor rbp, r15 -FN_PREFIX(CryptonightR_instruction192): - imul rbx, rax -FN_PREFIX(CryptonightR_instruction193): - imul rbx, rax -FN_PREFIX(CryptonightR_instruction194): - imul rbx, rax -FN_PREFIX(CryptonightR_instruction195): - add rbx, rax - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction196): - sub rbx, rax -FN_PREFIX(CryptonightR_instruction197): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction198): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction199): - xor rbx, rax -FN_PREFIX(CryptonightR_instruction200): - imul rsi, rax -FN_PREFIX(CryptonightR_instruction201): - imul rsi, rax -FN_PREFIX(CryptonightR_instruction202): - imul rsi, rax -FN_PREFIX(CryptonightR_instruction203): - add rsi, rax - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction204): - sub rsi, rax -FN_PREFIX(CryptonightR_instruction205): - ror esi, cl -FN_PREFIX(CryptonightR_instruction206): - rol esi, cl -FN_PREFIX(CryptonightR_instruction207): - xor rsi, rax -FN_PREFIX(CryptonightR_instruction208): - imul rdi, rax -FN_PREFIX(CryptonightR_instruction209): - imul rdi, rax -FN_PREFIX(CryptonightR_instruction210): - imul rdi, rax -FN_PREFIX(CryptonightR_instruction211): - add rdi, rax - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction212): - sub rdi, rax -FN_PREFIX(CryptonightR_instruction213): - ror edi, cl -FN_PREFIX(CryptonightR_instruction214): - rol edi, cl -FN_PREFIX(CryptonightR_instruction215): - xor rdi, rax -FN_PREFIX(CryptonightR_instruction216): - imul rbp, rax -FN_PREFIX(CryptonightR_instruction217): - imul rbp, rax -FN_PREFIX(CryptonightR_instruction218): - imul rbp, rax -FN_PREFIX(CryptonightR_instruction219): - add rbp, rax - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction220): - sub rbp, rax -FN_PREFIX(CryptonightR_instruction221): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction222): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction223): - xor rbp, rax -FN_PREFIX(CryptonightR_instruction224): - imul rbx, rdx -FN_PREFIX(CryptonightR_instruction225): - imul rbx, rdx -FN_PREFIX(CryptonightR_instruction226): - imul rbx, rdx -FN_PREFIX(CryptonightR_instruction227): - add rbx, rdx - add rbx, 2147483647 -FN_PREFIX(CryptonightR_instruction228): - sub rbx, rdx -FN_PREFIX(CryptonightR_instruction229): - ror ebx, cl -FN_PREFIX(CryptonightR_instruction230): - rol ebx, cl -FN_PREFIX(CryptonightR_instruction231): - xor rbx, rdx -FN_PREFIX(CryptonightR_instruction232): - imul rsi, rdx -FN_PREFIX(CryptonightR_instruction233): - imul rsi, rdx -FN_PREFIX(CryptonightR_instruction234): - imul rsi, rdx -FN_PREFIX(CryptonightR_instruction235): - add rsi, rdx - add rsi, 2147483647 -FN_PREFIX(CryptonightR_instruction236): - sub rsi, rdx -FN_PREFIX(CryptonightR_instruction237): - ror esi, cl -FN_PREFIX(CryptonightR_instruction238): - rol esi, cl -FN_PREFIX(CryptonightR_instruction239): - xor rsi, rdx -FN_PREFIX(CryptonightR_instruction240): - imul rdi, rdx -FN_PREFIX(CryptonightR_instruction241): - imul rdi, rdx -FN_PREFIX(CryptonightR_instruction242): - imul rdi, rdx -FN_PREFIX(CryptonightR_instruction243): - add rdi, rdx - add rdi, 2147483647 -FN_PREFIX(CryptonightR_instruction244): - sub rdi, rdx -FN_PREFIX(CryptonightR_instruction245): - ror edi, cl -FN_PREFIX(CryptonightR_instruction246): - rol edi, cl -FN_PREFIX(CryptonightR_instruction247): - xor rdi, rdx -FN_PREFIX(CryptonightR_instruction248): - imul rbp, rdx -FN_PREFIX(CryptonightR_instruction249): - imul rbp, rdx -FN_PREFIX(CryptonightR_instruction250): - imul rbp, rdx -FN_PREFIX(CryptonightR_instruction251): - add rbp, rdx - add rbp, 2147483647 -FN_PREFIX(CryptonightR_instruction252): - sub rbp, rdx -FN_PREFIX(CryptonightR_instruction253): - ror ebp, cl -FN_PREFIX(CryptonightR_instruction254): - rol ebp, cl -FN_PREFIX(CryptonightR_instruction255): - xor rbp, rdx -FN_PREFIX(CryptonightR_instruction256): - imul rbx, rbx -FN_PREFIX(CryptonightR_instruction_mov0): - -FN_PREFIX(CryptonightR_instruction_mov1): - -FN_PREFIX(CryptonightR_instruction_mov2): - -FN_PREFIX(CryptonightR_instruction_mov3): - -FN_PREFIX(CryptonightR_instruction_mov4): - -FN_PREFIX(CryptonightR_instruction_mov5): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov6): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov7): - -FN_PREFIX(CryptonightR_instruction_mov8): - -FN_PREFIX(CryptonightR_instruction_mov9): - -FN_PREFIX(CryptonightR_instruction_mov10): - -FN_PREFIX(CryptonightR_instruction_mov11): - -FN_PREFIX(CryptonightR_instruction_mov12): - -FN_PREFIX(CryptonightR_instruction_mov13): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov14): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov15): - -FN_PREFIX(CryptonightR_instruction_mov16): - -FN_PREFIX(CryptonightR_instruction_mov17): - -FN_PREFIX(CryptonightR_instruction_mov18): - -FN_PREFIX(CryptonightR_instruction_mov19): - -FN_PREFIX(CryptonightR_instruction_mov20): - -FN_PREFIX(CryptonightR_instruction_mov21): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov22): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov23): - -FN_PREFIX(CryptonightR_instruction_mov24): - -FN_PREFIX(CryptonightR_instruction_mov25): - -FN_PREFIX(CryptonightR_instruction_mov26): - -FN_PREFIX(CryptonightR_instruction_mov27): - -FN_PREFIX(CryptonightR_instruction_mov28): - -FN_PREFIX(CryptonightR_instruction_mov29): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov30): - mov rcx, rbx -FN_PREFIX(CryptonightR_instruction_mov31): - -FN_PREFIX(CryptonightR_instruction_mov32): - -FN_PREFIX(CryptonightR_instruction_mov33): - -FN_PREFIX(CryptonightR_instruction_mov34): - -FN_PREFIX(CryptonightR_instruction_mov35): - -FN_PREFIX(CryptonightR_instruction_mov36): - -FN_PREFIX(CryptonightR_instruction_mov37): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov38): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov39): - -FN_PREFIX(CryptonightR_instruction_mov40): - -FN_PREFIX(CryptonightR_instruction_mov41): - -FN_PREFIX(CryptonightR_instruction_mov42): - -FN_PREFIX(CryptonightR_instruction_mov43): - -FN_PREFIX(CryptonightR_instruction_mov44): - -FN_PREFIX(CryptonightR_instruction_mov45): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov46): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov47): - -FN_PREFIX(CryptonightR_instruction_mov48): - -FN_PREFIX(CryptonightR_instruction_mov49): - -FN_PREFIX(CryptonightR_instruction_mov50): - -FN_PREFIX(CryptonightR_instruction_mov51): - -FN_PREFIX(CryptonightR_instruction_mov52): - -FN_PREFIX(CryptonightR_instruction_mov53): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov54): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov55): - -FN_PREFIX(CryptonightR_instruction_mov56): - -FN_PREFIX(CryptonightR_instruction_mov57): - -FN_PREFIX(CryptonightR_instruction_mov58): - -FN_PREFIX(CryptonightR_instruction_mov59): - -FN_PREFIX(CryptonightR_instruction_mov60): - -FN_PREFIX(CryptonightR_instruction_mov61): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov62): - mov rcx, rsi -FN_PREFIX(CryptonightR_instruction_mov63): - -FN_PREFIX(CryptonightR_instruction_mov64): - -FN_PREFIX(CryptonightR_instruction_mov65): - -FN_PREFIX(CryptonightR_instruction_mov66): - -FN_PREFIX(CryptonightR_instruction_mov67): - -FN_PREFIX(CryptonightR_instruction_mov68): - -FN_PREFIX(CryptonightR_instruction_mov69): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov70): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov71): - -FN_PREFIX(CryptonightR_instruction_mov72): - -FN_PREFIX(CryptonightR_instruction_mov73): - -FN_PREFIX(CryptonightR_instruction_mov74): - -FN_PREFIX(CryptonightR_instruction_mov75): - -FN_PREFIX(CryptonightR_instruction_mov76): - -FN_PREFIX(CryptonightR_instruction_mov77): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov78): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov79): - -FN_PREFIX(CryptonightR_instruction_mov80): - -FN_PREFIX(CryptonightR_instruction_mov81): - -FN_PREFIX(CryptonightR_instruction_mov82): - -FN_PREFIX(CryptonightR_instruction_mov83): - -FN_PREFIX(CryptonightR_instruction_mov84): - -FN_PREFIX(CryptonightR_instruction_mov85): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov86): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov87): - -FN_PREFIX(CryptonightR_instruction_mov88): - -FN_PREFIX(CryptonightR_instruction_mov89): - -FN_PREFIX(CryptonightR_instruction_mov90): - -FN_PREFIX(CryptonightR_instruction_mov91): - -FN_PREFIX(CryptonightR_instruction_mov92): - -FN_PREFIX(CryptonightR_instruction_mov93): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov94): - mov rcx, rdi -FN_PREFIX(CryptonightR_instruction_mov95): - -FN_PREFIX(CryptonightR_instruction_mov96): - -FN_PREFIX(CryptonightR_instruction_mov97): - -FN_PREFIX(CryptonightR_instruction_mov98): - -FN_PREFIX(CryptonightR_instruction_mov99): - -FN_PREFIX(CryptonightR_instruction_mov100): - -FN_PREFIX(CryptonightR_instruction_mov101): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov102): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov103): - -FN_PREFIX(CryptonightR_instruction_mov104): - -FN_PREFIX(CryptonightR_instruction_mov105): - -FN_PREFIX(CryptonightR_instruction_mov106): - -FN_PREFIX(CryptonightR_instruction_mov107): - -FN_PREFIX(CryptonightR_instruction_mov108): - -FN_PREFIX(CryptonightR_instruction_mov109): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov110): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov111): - -FN_PREFIX(CryptonightR_instruction_mov112): - -FN_PREFIX(CryptonightR_instruction_mov113): - -FN_PREFIX(CryptonightR_instruction_mov114): - -FN_PREFIX(CryptonightR_instruction_mov115): - -FN_PREFIX(CryptonightR_instruction_mov116): - -FN_PREFIX(CryptonightR_instruction_mov117): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov118): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov119): - -FN_PREFIX(CryptonightR_instruction_mov120): - -FN_PREFIX(CryptonightR_instruction_mov121): - -FN_PREFIX(CryptonightR_instruction_mov122): - -FN_PREFIX(CryptonightR_instruction_mov123): - -FN_PREFIX(CryptonightR_instruction_mov124): - -FN_PREFIX(CryptonightR_instruction_mov125): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov126): - mov rcx, rbp -FN_PREFIX(CryptonightR_instruction_mov127): - -FN_PREFIX(CryptonightR_instruction_mov128): - -FN_PREFIX(CryptonightR_instruction_mov129): - -FN_PREFIX(CryptonightR_instruction_mov130): - -FN_PREFIX(CryptonightR_instruction_mov131): - -FN_PREFIX(CryptonightR_instruction_mov132): - -FN_PREFIX(CryptonightR_instruction_mov133): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov134): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov135): - -FN_PREFIX(CryptonightR_instruction_mov136): - -FN_PREFIX(CryptonightR_instruction_mov137): - -FN_PREFIX(CryptonightR_instruction_mov138): - -FN_PREFIX(CryptonightR_instruction_mov139): - -FN_PREFIX(CryptonightR_instruction_mov140): - -FN_PREFIX(CryptonightR_instruction_mov141): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov142): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov143): - -FN_PREFIX(CryptonightR_instruction_mov144): - -FN_PREFIX(CryptonightR_instruction_mov145): - -FN_PREFIX(CryptonightR_instruction_mov146): - -FN_PREFIX(CryptonightR_instruction_mov147): - -FN_PREFIX(CryptonightR_instruction_mov148): - -FN_PREFIX(CryptonightR_instruction_mov149): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov150): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov151): - -FN_PREFIX(CryptonightR_instruction_mov152): - -FN_PREFIX(CryptonightR_instruction_mov153): - -FN_PREFIX(CryptonightR_instruction_mov154): - -FN_PREFIX(CryptonightR_instruction_mov155): - -FN_PREFIX(CryptonightR_instruction_mov156): - -FN_PREFIX(CryptonightR_instruction_mov157): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov158): - mov rcx, rsp -FN_PREFIX(CryptonightR_instruction_mov159): - -FN_PREFIX(CryptonightR_instruction_mov160): - -FN_PREFIX(CryptonightR_instruction_mov161): - -FN_PREFIX(CryptonightR_instruction_mov162): - -FN_PREFIX(CryptonightR_instruction_mov163): - -FN_PREFIX(CryptonightR_instruction_mov164): - -FN_PREFIX(CryptonightR_instruction_mov165): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov166): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov167): - -FN_PREFIX(CryptonightR_instruction_mov168): - -FN_PREFIX(CryptonightR_instruction_mov169): - -FN_PREFIX(CryptonightR_instruction_mov170): - -FN_PREFIX(CryptonightR_instruction_mov171): - -FN_PREFIX(CryptonightR_instruction_mov172): - -FN_PREFIX(CryptonightR_instruction_mov173): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov174): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov175): - -FN_PREFIX(CryptonightR_instruction_mov176): - -FN_PREFIX(CryptonightR_instruction_mov177): - -FN_PREFIX(CryptonightR_instruction_mov178): - -FN_PREFIX(CryptonightR_instruction_mov179): - -FN_PREFIX(CryptonightR_instruction_mov180): - -FN_PREFIX(CryptonightR_instruction_mov181): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov182): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov183): - -FN_PREFIX(CryptonightR_instruction_mov184): - -FN_PREFIX(CryptonightR_instruction_mov185): - -FN_PREFIX(CryptonightR_instruction_mov186): - -FN_PREFIX(CryptonightR_instruction_mov187): - -FN_PREFIX(CryptonightR_instruction_mov188): - -FN_PREFIX(CryptonightR_instruction_mov189): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov190): - mov rcx, r15 -FN_PREFIX(CryptonightR_instruction_mov191): - -FN_PREFIX(CryptonightR_instruction_mov192): - -FN_PREFIX(CryptonightR_instruction_mov193): - -FN_PREFIX(CryptonightR_instruction_mov194): - -FN_PREFIX(CryptonightR_instruction_mov195): - -FN_PREFIX(CryptonightR_instruction_mov196): - -FN_PREFIX(CryptonightR_instruction_mov197): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov198): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov199): - -FN_PREFIX(CryptonightR_instruction_mov200): - -FN_PREFIX(CryptonightR_instruction_mov201): - -FN_PREFIX(CryptonightR_instruction_mov202): - -FN_PREFIX(CryptonightR_instruction_mov203): - -FN_PREFIX(CryptonightR_instruction_mov204): - -FN_PREFIX(CryptonightR_instruction_mov205): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov206): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov207): - -FN_PREFIX(CryptonightR_instruction_mov208): - -FN_PREFIX(CryptonightR_instruction_mov209): - -FN_PREFIX(CryptonightR_instruction_mov210): - -FN_PREFIX(CryptonightR_instruction_mov211): - -FN_PREFIX(CryptonightR_instruction_mov212): - -FN_PREFIX(CryptonightR_instruction_mov213): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov214): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov215): - -FN_PREFIX(CryptonightR_instruction_mov216): - -FN_PREFIX(CryptonightR_instruction_mov217): - -FN_PREFIX(CryptonightR_instruction_mov218): - -FN_PREFIX(CryptonightR_instruction_mov219): - -FN_PREFIX(CryptonightR_instruction_mov220): - -FN_PREFIX(CryptonightR_instruction_mov221): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov222): - mov rcx, rax -FN_PREFIX(CryptonightR_instruction_mov223): - -FN_PREFIX(CryptonightR_instruction_mov224): - -FN_PREFIX(CryptonightR_instruction_mov225): - -FN_PREFIX(CryptonightR_instruction_mov226): - -FN_PREFIX(CryptonightR_instruction_mov227): - -FN_PREFIX(CryptonightR_instruction_mov228): - -FN_PREFIX(CryptonightR_instruction_mov229): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov230): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov231): - -FN_PREFIX(CryptonightR_instruction_mov232): - -FN_PREFIX(CryptonightR_instruction_mov233): - -FN_PREFIX(CryptonightR_instruction_mov234): - -FN_PREFIX(CryptonightR_instruction_mov235): - -FN_PREFIX(CryptonightR_instruction_mov236): - -FN_PREFIX(CryptonightR_instruction_mov237): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov238): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov239): - -FN_PREFIX(CryptonightR_instruction_mov240): - -FN_PREFIX(CryptonightR_instruction_mov241): - -FN_PREFIX(CryptonightR_instruction_mov242): - -FN_PREFIX(CryptonightR_instruction_mov243): - -FN_PREFIX(CryptonightR_instruction_mov244): - -FN_PREFIX(CryptonightR_instruction_mov245): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov246): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov247): - -FN_PREFIX(CryptonightR_instruction_mov248): - -FN_PREFIX(CryptonightR_instruction_mov249): - -FN_PREFIX(CryptonightR_instruction_mov250): - -FN_PREFIX(CryptonightR_instruction_mov251): - -FN_PREFIX(CryptonightR_instruction_mov252): - -FN_PREFIX(CryptonightR_instruction_mov253): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov254): - mov rcx, rdx -FN_PREFIX(CryptonightR_instruction_mov255): - -FN_PREFIX(CryptonightR_instruction_mov256): diff --git a/src/crypto/asm/win64/CryptonightR_template.asm b/src/crypto/asm/win64/CryptonightR_template.asm index 25b72c3c..250eca3d 100644 --- a/src/crypto/asm/win64/CryptonightR_template.asm +++ b/src/crypto/asm/win64/CryptonightR_template.asm @@ -518,6 +518,8 @@ PUBLIC CryptonightR_instruction_mov256 INCLUDE CryptonightWOW_template_win.inc INCLUDE CryptonightR_template_win.inc +INCLUDE CryptonightWOW_soft_aes_template_win.inc +INCLUDE CryptonightR_soft_aes_template_win.inc CryptonightR_instruction0: imul rbx, rbx diff --git a/src/crypto/asm/win64/CryptonightR_template.h b/src/crypto/asm/win64/CryptonightR_template.h deleted file mode 100644 index c2054705..00000000 --- a/src/crypto/asm/win64/CryptonightR_template.h +++ /dev/null @@ -1,1063 +0,0 @@ -// Auto-generated file, do not edit - -extern "C" -{ - void CryptonightWOW_template_part1(); - void CryptonightWOW_template_mainloop(); - void CryptonightWOW_template_part2(); - void CryptonightWOW_template_part3(); - void CryptonightWOW_template_end(); - void CryptonightWOW_template_double_part1(); - void CryptonightWOW_template_double_mainloop(); - void CryptonightWOW_template_double_part2(); - void CryptonightWOW_template_double_part3(); - void CryptonightWOW_template_double_part4(); - void CryptonightWOW_template_double_end(); - - void CryptonightR_template_part1(); - void CryptonightR_template_mainloop(); - void CryptonightR_template_part2(); - void CryptonightR_template_part3(); - void CryptonightR_template_end(); - void CryptonightR_template_double_part1(); - void CryptonightR_template_double_mainloop(); - void CryptonightR_template_double_part2(); - void CryptonightR_template_double_part3(); - void CryptonightR_template_double_part4(); - void CryptonightR_template_double_end(); - - void CryptonightR_instruction0(); - void CryptonightR_instruction1(); - void CryptonightR_instruction2(); - void CryptonightR_instruction3(); - void CryptonightR_instruction4(); - void CryptonightR_instruction5(); - void CryptonightR_instruction6(); - void CryptonightR_instruction7(); - void CryptonightR_instruction8(); - void CryptonightR_instruction9(); - void CryptonightR_instruction10(); - void CryptonightR_instruction11(); - void CryptonightR_instruction12(); - void CryptonightR_instruction13(); - void CryptonightR_instruction14(); - void CryptonightR_instruction15(); - void CryptonightR_instruction16(); - void CryptonightR_instruction17(); - void CryptonightR_instruction18(); - void CryptonightR_instruction19(); - void CryptonightR_instruction20(); - void CryptonightR_instruction21(); - void CryptonightR_instruction22(); - void CryptonightR_instruction23(); - void CryptonightR_instruction24(); - void CryptonightR_instruction25(); - void CryptonightR_instruction26(); - void CryptonightR_instruction27(); - void CryptonightR_instruction28(); - void CryptonightR_instruction29(); - void CryptonightR_instruction30(); - void CryptonightR_instruction31(); - void CryptonightR_instruction32(); - void CryptonightR_instruction33(); - void CryptonightR_instruction34(); - void CryptonightR_instruction35(); - void CryptonightR_instruction36(); - void CryptonightR_instruction37(); - void CryptonightR_instruction38(); - void CryptonightR_instruction39(); - void CryptonightR_instruction40(); - void CryptonightR_instruction41(); - void CryptonightR_instruction42(); - void CryptonightR_instruction43(); - void CryptonightR_instruction44(); - void CryptonightR_instruction45(); - void CryptonightR_instruction46(); - void CryptonightR_instruction47(); - void CryptonightR_instruction48(); - void CryptonightR_instruction49(); - void CryptonightR_instruction50(); - void CryptonightR_instruction51(); - void CryptonightR_instruction52(); - void CryptonightR_instruction53(); - void CryptonightR_instruction54(); - void CryptonightR_instruction55(); - void CryptonightR_instruction56(); - void CryptonightR_instruction57(); - void CryptonightR_instruction58(); - void CryptonightR_instruction59(); - void CryptonightR_instruction60(); - void CryptonightR_instruction61(); - void CryptonightR_instruction62(); - void CryptonightR_instruction63(); - void CryptonightR_instruction64(); - void CryptonightR_instruction65(); - void CryptonightR_instruction66(); - void CryptonightR_instruction67(); - void CryptonightR_instruction68(); - void CryptonightR_instruction69(); - void CryptonightR_instruction70(); - void CryptonightR_instruction71(); - void CryptonightR_instruction72(); - void CryptonightR_instruction73(); - void CryptonightR_instruction74(); - void CryptonightR_instruction75(); - void CryptonightR_instruction76(); - void CryptonightR_instruction77(); - void CryptonightR_instruction78(); - void CryptonightR_instruction79(); - void CryptonightR_instruction80(); - void CryptonightR_instruction81(); - void CryptonightR_instruction82(); - void CryptonightR_instruction83(); - void CryptonightR_instruction84(); - void CryptonightR_instruction85(); - void CryptonightR_instruction86(); - void CryptonightR_instruction87(); - void CryptonightR_instruction88(); - void CryptonightR_instruction89(); - void CryptonightR_instruction90(); - void CryptonightR_instruction91(); - void CryptonightR_instruction92(); - void CryptonightR_instruction93(); - void CryptonightR_instruction94(); - void CryptonightR_instruction95(); - void CryptonightR_instruction96(); - void CryptonightR_instruction97(); - void CryptonightR_instruction98(); - void CryptonightR_instruction99(); - void CryptonightR_instruction100(); - void CryptonightR_instruction101(); - void CryptonightR_instruction102(); - void CryptonightR_instruction103(); - void CryptonightR_instruction104(); - void CryptonightR_instruction105(); - void CryptonightR_instruction106(); - void CryptonightR_instruction107(); - void CryptonightR_instruction108(); - void CryptonightR_instruction109(); - void CryptonightR_instruction110(); - void CryptonightR_instruction111(); - void CryptonightR_instruction112(); - void CryptonightR_instruction113(); - void CryptonightR_instruction114(); - void CryptonightR_instruction115(); - void CryptonightR_instruction116(); - void CryptonightR_instruction117(); - void CryptonightR_instruction118(); - void CryptonightR_instruction119(); - void CryptonightR_instruction120(); - void CryptonightR_instruction121(); - void CryptonightR_instruction122(); - void CryptonightR_instruction123(); - void CryptonightR_instruction124(); - void CryptonightR_instruction125(); - void CryptonightR_instruction126(); - void CryptonightR_instruction127(); - void CryptonightR_instruction128(); - void CryptonightR_instruction129(); - void CryptonightR_instruction130(); - void CryptonightR_instruction131(); - void CryptonightR_instruction132(); - void CryptonightR_instruction133(); - void CryptonightR_instruction134(); - void CryptonightR_instruction135(); - void CryptonightR_instruction136(); - void CryptonightR_instruction137(); - void CryptonightR_instruction138(); - void CryptonightR_instruction139(); - void CryptonightR_instruction140(); - void CryptonightR_instruction141(); - void CryptonightR_instruction142(); - void CryptonightR_instruction143(); - void CryptonightR_instruction144(); - void CryptonightR_instruction145(); - void CryptonightR_instruction146(); - void CryptonightR_instruction147(); - void CryptonightR_instruction148(); - void CryptonightR_instruction149(); - void CryptonightR_instruction150(); - void CryptonightR_instruction151(); - void CryptonightR_instruction152(); - void CryptonightR_instruction153(); - void CryptonightR_instruction154(); - void CryptonightR_instruction155(); - void CryptonightR_instruction156(); - void CryptonightR_instruction157(); - void CryptonightR_instruction158(); - void CryptonightR_instruction159(); - void CryptonightR_instruction160(); - void CryptonightR_instruction161(); - void CryptonightR_instruction162(); - void CryptonightR_instruction163(); - void CryptonightR_instruction164(); - void CryptonightR_instruction165(); - void CryptonightR_instruction166(); - void CryptonightR_instruction167(); - void CryptonightR_instruction168(); - void CryptonightR_instruction169(); - void CryptonightR_instruction170(); - void CryptonightR_instruction171(); - void CryptonightR_instruction172(); - void CryptonightR_instruction173(); - void CryptonightR_instruction174(); - void CryptonightR_instruction175(); - void CryptonightR_instruction176(); - void CryptonightR_instruction177(); - void CryptonightR_instruction178(); - void CryptonightR_instruction179(); - void CryptonightR_instruction180(); - void CryptonightR_instruction181(); - void CryptonightR_instruction182(); - void CryptonightR_instruction183(); - void CryptonightR_instruction184(); - void CryptonightR_instruction185(); - void CryptonightR_instruction186(); - void CryptonightR_instruction187(); - void CryptonightR_instruction188(); - void CryptonightR_instruction189(); - void CryptonightR_instruction190(); - void CryptonightR_instruction191(); - void CryptonightR_instruction192(); - void CryptonightR_instruction193(); - void CryptonightR_instruction194(); - void CryptonightR_instruction195(); - void CryptonightR_instruction196(); - void CryptonightR_instruction197(); - void CryptonightR_instruction198(); - void CryptonightR_instruction199(); - void CryptonightR_instruction200(); - void CryptonightR_instruction201(); - void CryptonightR_instruction202(); - void CryptonightR_instruction203(); - void CryptonightR_instruction204(); - void CryptonightR_instruction205(); - void CryptonightR_instruction206(); - void CryptonightR_instruction207(); - void CryptonightR_instruction208(); - void CryptonightR_instruction209(); - void CryptonightR_instruction210(); - void CryptonightR_instruction211(); - void CryptonightR_instruction212(); - void CryptonightR_instruction213(); - void CryptonightR_instruction214(); - void CryptonightR_instruction215(); - void CryptonightR_instruction216(); - void CryptonightR_instruction217(); - void CryptonightR_instruction218(); - void CryptonightR_instruction219(); - void CryptonightR_instruction220(); - void CryptonightR_instruction221(); - void CryptonightR_instruction222(); - void CryptonightR_instruction223(); - void CryptonightR_instruction224(); - void CryptonightR_instruction225(); - void CryptonightR_instruction226(); - void CryptonightR_instruction227(); - void CryptonightR_instruction228(); - void CryptonightR_instruction229(); - void CryptonightR_instruction230(); - void CryptonightR_instruction231(); - void CryptonightR_instruction232(); - void CryptonightR_instruction233(); - void CryptonightR_instruction234(); - void CryptonightR_instruction235(); - void CryptonightR_instruction236(); - void CryptonightR_instruction237(); - void CryptonightR_instruction238(); - void CryptonightR_instruction239(); - void CryptonightR_instruction240(); - void CryptonightR_instruction241(); - void CryptonightR_instruction242(); - void CryptonightR_instruction243(); - void CryptonightR_instruction244(); - void CryptonightR_instruction245(); - void CryptonightR_instruction246(); - void CryptonightR_instruction247(); - void CryptonightR_instruction248(); - void CryptonightR_instruction249(); - void CryptonightR_instruction250(); - void CryptonightR_instruction251(); - void CryptonightR_instruction252(); - void CryptonightR_instruction253(); - void CryptonightR_instruction254(); - void CryptonightR_instruction255(); - void CryptonightR_instruction256(); - void CryptonightR_instruction_mov0(); - void CryptonightR_instruction_mov1(); - void CryptonightR_instruction_mov2(); - void CryptonightR_instruction_mov3(); - void CryptonightR_instruction_mov4(); - void CryptonightR_instruction_mov5(); - void CryptonightR_instruction_mov6(); - void CryptonightR_instruction_mov7(); - void CryptonightR_instruction_mov8(); - void CryptonightR_instruction_mov9(); - void CryptonightR_instruction_mov10(); - void CryptonightR_instruction_mov11(); - void CryptonightR_instruction_mov12(); - void CryptonightR_instruction_mov13(); - void CryptonightR_instruction_mov14(); - void CryptonightR_instruction_mov15(); - void CryptonightR_instruction_mov16(); - void CryptonightR_instruction_mov17(); - void CryptonightR_instruction_mov18(); - void CryptonightR_instruction_mov19(); - void CryptonightR_instruction_mov20(); - void CryptonightR_instruction_mov21(); - void CryptonightR_instruction_mov22(); - void CryptonightR_instruction_mov23(); - void CryptonightR_instruction_mov24(); - void CryptonightR_instruction_mov25(); - void CryptonightR_instruction_mov26(); - void CryptonightR_instruction_mov27(); - void CryptonightR_instruction_mov28(); - void CryptonightR_instruction_mov29(); - void CryptonightR_instruction_mov30(); - void CryptonightR_instruction_mov31(); - void CryptonightR_instruction_mov32(); - void CryptonightR_instruction_mov33(); - void CryptonightR_instruction_mov34(); - void CryptonightR_instruction_mov35(); - void CryptonightR_instruction_mov36(); - void CryptonightR_instruction_mov37(); - void CryptonightR_instruction_mov38(); - void CryptonightR_instruction_mov39(); - void CryptonightR_instruction_mov40(); - void CryptonightR_instruction_mov41(); - void CryptonightR_instruction_mov42(); - void CryptonightR_instruction_mov43(); - void CryptonightR_instruction_mov44(); - void CryptonightR_instruction_mov45(); - void CryptonightR_instruction_mov46(); - void CryptonightR_instruction_mov47(); - void CryptonightR_instruction_mov48(); - void CryptonightR_instruction_mov49(); - void CryptonightR_instruction_mov50(); - void CryptonightR_instruction_mov51(); - void CryptonightR_instruction_mov52(); - void CryptonightR_instruction_mov53(); - void CryptonightR_instruction_mov54(); - void CryptonightR_instruction_mov55(); - void CryptonightR_instruction_mov56(); - void CryptonightR_instruction_mov57(); - void CryptonightR_instruction_mov58(); - void CryptonightR_instruction_mov59(); - void CryptonightR_instruction_mov60(); - void CryptonightR_instruction_mov61(); - void CryptonightR_instruction_mov62(); - void CryptonightR_instruction_mov63(); - void CryptonightR_instruction_mov64(); - void CryptonightR_instruction_mov65(); - void CryptonightR_instruction_mov66(); - void CryptonightR_instruction_mov67(); - void CryptonightR_instruction_mov68(); - void CryptonightR_instruction_mov69(); - void CryptonightR_instruction_mov70(); - void CryptonightR_instruction_mov71(); - void CryptonightR_instruction_mov72(); - void CryptonightR_instruction_mov73(); - void CryptonightR_instruction_mov74(); - void CryptonightR_instruction_mov75(); - void CryptonightR_instruction_mov76(); - void CryptonightR_instruction_mov77(); - void CryptonightR_instruction_mov78(); - void CryptonightR_instruction_mov79(); - void CryptonightR_instruction_mov80(); - void CryptonightR_instruction_mov81(); - void CryptonightR_instruction_mov82(); - void CryptonightR_instruction_mov83(); - void CryptonightR_instruction_mov84(); - void CryptonightR_instruction_mov85(); - void CryptonightR_instruction_mov86(); - void CryptonightR_instruction_mov87(); - void CryptonightR_instruction_mov88(); - void CryptonightR_instruction_mov89(); - void CryptonightR_instruction_mov90(); - void CryptonightR_instruction_mov91(); - void CryptonightR_instruction_mov92(); - void CryptonightR_instruction_mov93(); - void CryptonightR_instruction_mov94(); - void CryptonightR_instruction_mov95(); - void CryptonightR_instruction_mov96(); - void CryptonightR_instruction_mov97(); - void CryptonightR_instruction_mov98(); - void CryptonightR_instruction_mov99(); - void CryptonightR_instruction_mov100(); - void CryptonightR_instruction_mov101(); - void CryptonightR_instruction_mov102(); - void CryptonightR_instruction_mov103(); - void CryptonightR_instruction_mov104(); - void CryptonightR_instruction_mov105(); - void CryptonightR_instruction_mov106(); - void CryptonightR_instruction_mov107(); - void CryptonightR_instruction_mov108(); - void CryptonightR_instruction_mov109(); - void CryptonightR_instruction_mov110(); - void CryptonightR_instruction_mov111(); - void CryptonightR_instruction_mov112(); - void CryptonightR_instruction_mov113(); - void CryptonightR_instruction_mov114(); - void CryptonightR_instruction_mov115(); - void CryptonightR_instruction_mov116(); - void CryptonightR_instruction_mov117(); - void CryptonightR_instruction_mov118(); - void CryptonightR_instruction_mov119(); - void CryptonightR_instruction_mov120(); - void CryptonightR_instruction_mov121(); - void CryptonightR_instruction_mov122(); - void CryptonightR_instruction_mov123(); - void CryptonightR_instruction_mov124(); - void CryptonightR_instruction_mov125(); - void CryptonightR_instruction_mov126(); - void CryptonightR_instruction_mov127(); - void CryptonightR_instruction_mov128(); - void CryptonightR_instruction_mov129(); - void CryptonightR_instruction_mov130(); - void CryptonightR_instruction_mov131(); - void CryptonightR_instruction_mov132(); - void CryptonightR_instruction_mov133(); - void CryptonightR_instruction_mov134(); - void CryptonightR_instruction_mov135(); - void CryptonightR_instruction_mov136(); - void CryptonightR_instruction_mov137(); - void CryptonightR_instruction_mov138(); - void CryptonightR_instruction_mov139(); - void CryptonightR_instruction_mov140(); - void CryptonightR_instruction_mov141(); - void CryptonightR_instruction_mov142(); - void CryptonightR_instruction_mov143(); - void CryptonightR_instruction_mov144(); - void CryptonightR_instruction_mov145(); - void CryptonightR_instruction_mov146(); - void CryptonightR_instruction_mov147(); - void CryptonightR_instruction_mov148(); - void CryptonightR_instruction_mov149(); - void CryptonightR_instruction_mov150(); - void CryptonightR_instruction_mov151(); - void CryptonightR_instruction_mov152(); - void CryptonightR_instruction_mov153(); - void CryptonightR_instruction_mov154(); - void CryptonightR_instruction_mov155(); - void CryptonightR_instruction_mov156(); - void CryptonightR_instruction_mov157(); - void CryptonightR_instruction_mov158(); - void CryptonightR_instruction_mov159(); - void CryptonightR_instruction_mov160(); - void CryptonightR_instruction_mov161(); - void CryptonightR_instruction_mov162(); - void CryptonightR_instruction_mov163(); - void CryptonightR_instruction_mov164(); - void CryptonightR_instruction_mov165(); - void CryptonightR_instruction_mov166(); - void CryptonightR_instruction_mov167(); - void CryptonightR_instruction_mov168(); - void CryptonightR_instruction_mov169(); - void CryptonightR_instruction_mov170(); - void CryptonightR_instruction_mov171(); - void CryptonightR_instruction_mov172(); - void CryptonightR_instruction_mov173(); - void CryptonightR_instruction_mov174(); - void CryptonightR_instruction_mov175(); - void CryptonightR_instruction_mov176(); - void CryptonightR_instruction_mov177(); - void CryptonightR_instruction_mov178(); - void CryptonightR_instruction_mov179(); - void CryptonightR_instruction_mov180(); - void CryptonightR_instruction_mov181(); - void CryptonightR_instruction_mov182(); - void CryptonightR_instruction_mov183(); - void CryptonightR_instruction_mov184(); - void CryptonightR_instruction_mov185(); - void CryptonightR_instruction_mov186(); - void CryptonightR_instruction_mov187(); - void CryptonightR_instruction_mov188(); - void CryptonightR_instruction_mov189(); - void CryptonightR_instruction_mov190(); - void CryptonightR_instruction_mov191(); - void CryptonightR_instruction_mov192(); - void CryptonightR_instruction_mov193(); - void CryptonightR_instruction_mov194(); - void CryptonightR_instruction_mov195(); - void CryptonightR_instruction_mov196(); - void CryptonightR_instruction_mov197(); - void CryptonightR_instruction_mov198(); - void CryptonightR_instruction_mov199(); - void CryptonightR_instruction_mov200(); - void CryptonightR_instruction_mov201(); - void CryptonightR_instruction_mov202(); - void CryptonightR_instruction_mov203(); - void CryptonightR_instruction_mov204(); - void CryptonightR_instruction_mov205(); - void CryptonightR_instruction_mov206(); - void CryptonightR_instruction_mov207(); - void CryptonightR_instruction_mov208(); - void CryptonightR_instruction_mov209(); - void CryptonightR_instruction_mov210(); - void CryptonightR_instruction_mov211(); - void CryptonightR_instruction_mov212(); - void CryptonightR_instruction_mov213(); - void CryptonightR_instruction_mov214(); - void CryptonightR_instruction_mov215(); - void CryptonightR_instruction_mov216(); - void CryptonightR_instruction_mov217(); - void CryptonightR_instruction_mov218(); - void CryptonightR_instruction_mov219(); - void CryptonightR_instruction_mov220(); - void CryptonightR_instruction_mov221(); - void CryptonightR_instruction_mov222(); - void CryptonightR_instruction_mov223(); - void CryptonightR_instruction_mov224(); - void CryptonightR_instruction_mov225(); - void CryptonightR_instruction_mov226(); - void CryptonightR_instruction_mov227(); - void CryptonightR_instruction_mov228(); - void CryptonightR_instruction_mov229(); - void CryptonightR_instruction_mov230(); - void CryptonightR_instruction_mov231(); - void CryptonightR_instruction_mov232(); - void CryptonightR_instruction_mov233(); - void CryptonightR_instruction_mov234(); - void CryptonightR_instruction_mov235(); - void CryptonightR_instruction_mov236(); - void CryptonightR_instruction_mov237(); - void CryptonightR_instruction_mov238(); - void CryptonightR_instruction_mov239(); - void CryptonightR_instruction_mov240(); - void CryptonightR_instruction_mov241(); - void CryptonightR_instruction_mov242(); - void CryptonightR_instruction_mov243(); - void CryptonightR_instruction_mov244(); - void CryptonightR_instruction_mov245(); - void CryptonightR_instruction_mov246(); - void CryptonightR_instruction_mov247(); - void CryptonightR_instruction_mov248(); - void CryptonightR_instruction_mov249(); - void CryptonightR_instruction_mov250(); - void CryptonightR_instruction_mov251(); - void CryptonightR_instruction_mov252(); - void CryptonightR_instruction_mov253(); - void CryptonightR_instruction_mov254(); - void CryptonightR_instruction_mov255(); - void CryptonightR_instruction_mov256(); -} - -const void_func instructions[257] = { - CryptonightR_instruction0, - CryptonightR_instruction1, - CryptonightR_instruction2, - CryptonightR_instruction3, - CryptonightR_instruction4, - CryptonightR_instruction5, - CryptonightR_instruction6, - CryptonightR_instruction7, - CryptonightR_instruction8, - CryptonightR_instruction9, - CryptonightR_instruction10, - CryptonightR_instruction11, - CryptonightR_instruction12, - CryptonightR_instruction13, - CryptonightR_instruction14, - CryptonightR_instruction15, - CryptonightR_instruction16, - CryptonightR_instruction17, - CryptonightR_instruction18, - CryptonightR_instruction19, - CryptonightR_instruction20, - CryptonightR_instruction21, - CryptonightR_instruction22, - CryptonightR_instruction23, - CryptonightR_instruction24, - CryptonightR_instruction25, - CryptonightR_instruction26, - CryptonightR_instruction27, - CryptonightR_instruction28, - CryptonightR_instruction29, - CryptonightR_instruction30, - CryptonightR_instruction31, - CryptonightR_instruction32, - CryptonightR_instruction33, - CryptonightR_instruction34, - CryptonightR_instruction35, - CryptonightR_instruction36, - CryptonightR_instruction37, - CryptonightR_instruction38, - CryptonightR_instruction39, - CryptonightR_instruction40, - CryptonightR_instruction41, - CryptonightR_instruction42, - CryptonightR_instruction43, - CryptonightR_instruction44, - CryptonightR_instruction45, - CryptonightR_instruction46, - CryptonightR_instruction47, - CryptonightR_instruction48, - CryptonightR_instruction49, - CryptonightR_instruction50, - CryptonightR_instruction51, - CryptonightR_instruction52, - CryptonightR_instruction53, - CryptonightR_instruction54, - CryptonightR_instruction55, - CryptonightR_instruction56, - CryptonightR_instruction57, - CryptonightR_instruction58, - CryptonightR_instruction59, - CryptonightR_instruction60, - CryptonightR_instruction61, - CryptonightR_instruction62, - CryptonightR_instruction63, - CryptonightR_instruction64, - CryptonightR_instruction65, - CryptonightR_instruction66, - CryptonightR_instruction67, - CryptonightR_instruction68, - CryptonightR_instruction69, - CryptonightR_instruction70, - CryptonightR_instruction71, - CryptonightR_instruction72, - CryptonightR_instruction73, - CryptonightR_instruction74, - CryptonightR_instruction75, - CryptonightR_instruction76, - CryptonightR_instruction77, - CryptonightR_instruction78, - CryptonightR_instruction79, - CryptonightR_instruction80, - CryptonightR_instruction81, - CryptonightR_instruction82, - CryptonightR_instruction83, - CryptonightR_instruction84, - CryptonightR_instruction85, - CryptonightR_instruction86, - CryptonightR_instruction87, - CryptonightR_instruction88, - CryptonightR_instruction89, - CryptonightR_instruction90, - CryptonightR_instruction91, - CryptonightR_instruction92, - CryptonightR_instruction93, - CryptonightR_instruction94, - CryptonightR_instruction95, - CryptonightR_instruction96, - CryptonightR_instruction97, - CryptonightR_instruction98, - CryptonightR_instruction99, - CryptonightR_instruction100, - CryptonightR_instruction101, - CryptonightR_instruction102, - CryptonightR_instruction103, - CryptonightR_instruction104, - CryptonightR_instruction105, - CryptonightR_instruction106, - CryptonightR_instruction107, - CryptonightR_instruction108, - CryptonightR_instruction109, - CryptonightR_instruction110, - CryptonightR_instruction111, - CryptonightR_instruction112, - CryptonightR_instruction113, - CryptonightR_instruction114, - CryptonightR_instruction115, - CryptonightR_instruction116, - CryptonightR_instruction117, - CryptonightR_instruction118, - CryptonightR_instruction119, - CryptonightR_instruction120, - CryptonightR_instruction121, - CryptonightR_instruction122, - CryptonightR_instruction123, - CryptonightR_instruction124, - CryptonightR_instruction125, - CryptonightR_instruction126, - CryptonightR_instruction127, - CryptonightR_instruction128, - CryptonightR_instruction129, - CryptonightR_instruction130, - CryptonightR_instruction131, - CryptonightR_instruction132, - CryptonightR_instruction133, - CryptonightR_instruction134, - CryptonightR_instruction135, - CryptonightR_instruction136, - CryptonightR_instruction137, - CryptonightR_instruction138, - CryptonightR_instruction139, - CryptonightR_instruction140, - CryptonightR_instruction141, - CryptonightR_instruction142, - CryptonightR_instruction143, - CryptonightR_instruction144, - CryptonightR_instruction145, - CryptonightR_instruction146, - CryptonightR_instruction147, - CryptonightR_instruction148, - CryptonightR_instruction149, - CryptonightR_instruction150, - CryptonightR_instruction151, - CryptonightR_instruction152, - CryptonightR_instruction153, - CryptonightR_instruction154, - CryptonightR_instruction155, - CryptonightR_instruction156, - CryptonightR_instruction157, - CryptonightR_instruction158, - CryptonightR_instruction159, - CryptonightR_instruction160, - CryptonightR_instruction161, - CryptonightR_instruction162, - CryptonightR_instruction163, - CryptonightR_instruction164, - CryptonightR_instruction165, - CryptonightR_instruction166, - CryptonightR_instruction167, - CryptonightR_instruction168, - CryptonightR_instruction169, - CryptonightR_instruction170, - CryptonightR_instruction171, - CryptonightR_instruction172, - CryptonightR_instruction173, - CryptonightR_instruction174, - CryptonightR_instruction175, - CryptonightR_instruction176, - CryptonightR_instruction177, - CryptonightR_instruction178, - CryptonightR_instruction179, - CryptonightR_instruction180, - CryptonightR_instruction181, - CryptonightR_instruction182, - CryptonightR_instruction183, - CryptonightR_instruction184, - CryptonightR_instruction185, - CryptonightR_instruction186, - CryptonightR_instruction187, - CryptonightR_instruction188, - CryptonightR_instruction189, - CryptonightR_instruction190, - CryptonightR_instruction191, - CryptonightR_instruction192, - CryptonightR_instruction193, - CryptonightR_instruction194, - CryptonightR_instruction195, - CryptonightR_instruction196, - CryptonightR_instruction197, - CryptonightR_instruction198, - CryptonightR_instruction199, - CryptonightR_instruction200, - CryptonightR_instruction201, - CryptonightR_instruction202, - CryptonightR_instruction203, - CryptonightR_instruction204, - CryptonightR_instruction205, - CryptonightR_instruction206, - CryptonightR_instruction207, - CryptonightR_instruction208, - CryptonightR_instruction209, - CryptonightR_instruction210, - CryptonightR_instruction211, - CryptonightR_instruction212, - CryptonightR_instruction213, - CryptonightR_instruction214, - CryptonightR_instruction215, - CryptonightR_instruction216, - CryptonightR_instruction217, - CryptonightR_instruction218, - CryptonightR_instruction219, - CryptonightR_instruction220, - CryptonightR_instruction221, - CryptonightR_instruction222, - CryptonightR_instruction223, - CryptonightR_instruction224, - CryptonightR_instruction225, - CryptonightR_instruction226, - CryptonightR_instruction227, - CryptonightR_instruction228, - CryptonightR_instruction229, - CryptonightR_instruction230, - CryptonightR_instruction231, - CryptonightR_instruction232, - CryptonightR_instruction233, - CryptonightR_instruction234, - CryptonightR_instruction235, - CryptonightR_instruction236, - CryptonightR_instruction237, - CryptonightR_instruction238, - CryptonightR_instruction239, - CryptonightR_instruction240, - CryptonightR_instruction241, - CryptonightR_instruction242, - CryptonightR_instruction243, - CryptonightR_instruction244, - CryptonightR_instruction245, - CryptonightR_instruction246, - CryptonightR_instruction247, - CryptonightR_instruction248, - CryptonightR_instruction249, - CryptonightR_instruction250, - CryptonightR_instruction251, - CryptonightR_instruction252, - CryptonightR_instruction253, - CryptonightR_instruction254, - CryptonightR_instruction255, - CryptonightR_instruction256, -}; - -const void_func instructions_mov[257] = { - CryptonightR_instruction_mov0, - CryptonightR_instruction_mov1, - CryptonightR_instruction_mov2, - CryptonightR_instruction_mov3, - CryptonightR_instruction_mov4, - CryptonightR_instruction_mov5, - CryptonightR_instruction_mov6, - CryptonightR_instruction_mov7, - CryptonightR_instruction_mov8, - CryptonightR_instruction_mov9, - CryptonightR_instruction_mov10, - CryptonightR_instruction_mov11, - CryptonightR_instruction_mov12, - CryptonightR_instruction_mov13, - CryptonightR_instruction_mov14, - CryptonightR_instruction_mov15, - CryptonightR_instruction_mov16, - CryptonightR_instruction_mov17, - CryptonightR_instruction_mov18, - CryptonightR_instruction_mov19, - CryptonightR_instruction_mov20, - CryptonightR_instruction_mov21, - CryptonightR_instruction_mov22, - CryptonightR_instruction_mov23, - CryptonightR_instruction_mov24, - CryptonightR_instruction_mov25, - CryptonightR_instruction_mov26, - CryptonightR_instruction_mov27, - CryptonightR_instruction_mov28, - CryptonightR_instruction_mov29, - CryptonightR_instruction_mov30, - CryptonightR_instruction_mov31, - CryptonightR_instruction_mov32, - CryptonightR_instruction_mov33, - CryptonightR_instruction_mov34, - CryptonightR_instruction_mov35, - CryptonightR_instruction_mov36, - CryptonightR_instruction_mov37, - CryptonightR_instruction_mov38, - CryptonightR_instruction_mov39, - CryptonightR_instruction_mov40, - CryptonightR_instruction_mov41, - CryptonightR_instruction_mov42, - CryptonightR_instruction_mov43, - CryptonightR_instruction_mov44, - CryptonightR_instruction_mov45, - CryptonightR_instruction_mov46, - CryptonightR_instruction_mov47, - CryptonightR_instruction_mov48, - CryptonightR_instruction_mov49, - CryptonightR_instruction_mov50, - CryptonightR_instruction_mov51, - CryptonightR_instruction_mov52, - CryptonightR_instruction_mov53, - CryptonightR_instruction_mov54, - CryptonightR_instruction_mov55, - CryptonightR_instruction_mov56, - CryptonightR_instruction_mov57, - CryptonightR_instruction_mov58, - CryptonightR_instruction_mov59, - CryptonightR_instruction_mov60, - CryptonightR_instruction_mov61, - CryptonightR_instruction_mov62, - CryptonightR_instruction_mov63, - CryptonightR_instruction_mov64, - CryptonightR_instruction_mov65, - CryptonightR_instruction_mov66, - CryptonightR_instruction_mov67, - CryptonightR_instruction_mov68, - CryptonightR_instruction_mov69, - CryptonightR_instruction_mov70, - CryptonightR_instruction_mov71, - CryptonightR_instruction_mov72, - CryptonightR_instruction_mov73, - CryptonightR_instruction_mov74, - CryptonightR_instruction_mov75, - CryptonightR_instruction_mov76, - CryptonightR_instruction_mov77, - CryptonightR_instruction_mov78, - CryptonightR_instruction_mov79, - CryptonightR_instruction_mov80, - CryptonightR_instruction_mov81, - CryptonightR_instruction_mov82, - CryptonightR_instruction_mov83, - CryptonightR_instruction_mov84, - CryptonightR_instruction_mov85, - CryptonightR_instruction_mov86, - CryptonightR_instruction_mov87, - CryptonightR_instruction_mov88, - CryptonightR_instruction_mov89, - CryptonightR_instruction_mov90, - CryptonightR_instruction_mov91, - CryptonightR_instruction_mov92, - CryptonightR_instruction_mov93, - CryptonightR_instruction_mov94, - CryptonightR_instruction_mov95, - CryptonightR_instruction_mov96, - CryptonightR_instruction_mov97, - CryptonightR_instruction_mov98, - CryptonightR_instruction_mov99, - CryptonightR_instruction_mov100, - CryptonightR_instruction_mov101, - CryptonightR_instruction_mov102, - CryptonightR_instruction_mov103, - CryptonightR_instruction_mov104, - CryptonightR_instruction_mov105, - CryptonightR_instruction_mov106, - CryptonightR_instruction_mov107, - CryptonightR_instruction_mov108, - CryptonightR_instruction_mov109, - CryptonightR_instruction_mov110, - CryptonightR_instruction_mov111, - CryptonightR_instruction_mov112, - CryptonightR_instruction_mov113, - CryptonightR_instruction_mov114, - CryptonightR_instruction_mov115, - CryptonightR_instruction_mov116, - CryptonightR_instruction_mov117, - CryptonightR_instruction_mov118, - CryptonightR_instruction_mov119, - CryptonightR_instruction_mov120, - CryptonightR_instruction_mov121, - CryptonightR_instruction_mov122, - CryptonightR_instruction_mov123, - CryptonightR_instruction_mov124, - CryptonightR_instruction_mov125, - CryptonightR_instruction_mov126, - CryptonightR_instruction_mov127, - CryptonightR_instruction_mov128, - CryptonightR_instruction_mov129, - CryptonightR_instruction_mov130, - CryptonightR_instruction_mov131, - CryptonightR_instruction_mov132, - CryptonightR_instruction_mov133, - CryptonightR_instruction_mov134, - CryptonightR_instruction_mov135, - CryptonightR_instruction_mov136, - CryptonightR_instruction_mov137, - CryptonightR_instruction_mov138, - CryptonightR_instruction_mov139, - CryptonightR_instruction_mov140, - CryptonightR_instruction_mov141, - CryptonightR_instruction_mov142, - CryptonightR_instruction_mov143, - CryptonightR_instruction_mov144, - CryptonightR_instruction_mov145, - CryptonightR_instruction_mov146, - CryptonightR_instruction_mov147, - CryptonightR_instruction_mov148, - CryptonightR_instruction_mov149, - CryptonightR_instruction_mov150, - CryptonightR_instruction_mov151, - CryptonightR_instruction_mov152, - CryptonightR_instruction_mov153, - CryptonightR_instruction_mov154, - CryptonightR_instruction_mov155, - CryptonightR_instruction_mov156, - CryptonightR_instruction_mov157, - CryptonightR_instruction_mov158, - CryptonightR_instruction_mov159, - CryptonightR_instruction_mov160, - CryptonightR_instruction_mov161, - CryptonightR_instruction_mov162, - CryptonightR_instruction_mov163, - CryptonightR_instruction_mov164, - CryptonightR_instruction_mov165, - CryptonightR_instruction_mov166, - CryptonightR_instruction_mov167, - CryptonightR_instruction_mov168, - CryptonightR_instruction_mov169, - CryptonightR_instruction_mov170, - CryptonightR_instruction_mov171, - CryptonightR_instruction_mov172, - CryptonightR_instruction_mov173, - CryptonightR_instruction_mov174, - CryptonightR_instruction_mov175, - CryptonightR_instruction_mov176, - CryptonightR_instruction_mov177, - CryptonightR_instruction_mov178, - CryptonightR_instruction_mov179, - CryptonightR_instruction_mov180, - CryptonightR_instruction_mov181, - CryptonightR_instruction_mov182, - CryptonightR_instruction_mov183, - CryptonightR_instruction_mov184, - CryptonightR_instruction_mov185, - CryptonightR_instruction_mov186, - CryptonightR_instruction_mov187, - CryptonightR_instruction_mov188, - CryptonightR_instruction_mov189, - CryptonightR_instruction_mov190, - CryptonightR_instruction_mov191, - CryptonightR_instruction_mov192, - CryptonightR_instruction_mov193, - CryptonightR_instruction_mov194, - CryptonightR_instruction_mov195, - CryptonightR_instruction_mov196, - CryptonightR_instruction_mov197, - CryptonightR_instruction_mov198, - CryptonightR_instruction_mov199, - CryptonightR_instruction_mov200, - CryptonightR_instruction_mov201, - CryptonightR_instruction_mov202, - CryptonightR_instruction_mov203, - CryptonightR_instruction_mov204, - CryptonightR_instruction_mov205, - CryptonightR_instruction_mov206, - CryptonightR_instruction_mov207, - CryptonightR_instruction_mov208, - CryptonightR_instruction_mov209, - CryptonightR_instruction_mov210, - CryptonightR_instruction_mov211, - CryptonightR_instruction_mov212, - CryptonightR_instruction_mov213, - CryptonightR_instruction_mov214, - CryptonightR_instruction_mov215, - CryptonightR_instruction_mov216, - CryptonightR_instruction_mov217, - CryptonightR_instruction_mov218, - CryptonightR_instruction_mov219, - CryptonightR_instruction_mov220, - CryptonightR_instruction_mov221, - CryptonightR_instruction_mov222, - CryptonightR_instruction_mov223, - CryptonightR_instruction_mov224, - CryptonightR_instruction_mov225, - CryptonightR_instruction_mov226, - CryptonightR_instruction_mov227, - CryptonightR_instruction_mov228, - CryptonightR_instruction_mov229, - CryptonightR_instruction_mov230, - CryptonightR_instruction_mov231, - CryptonightR_instruction_mov232, - CryptonightR_instruction_mov233, - CryptonightR_instruction_mov234, - CryptonightR_instruction_mov235, - CryptonightR_instruction_mov236, - CryptonightR_instruction_mov237, - CryptonightR_instruction_mov238, - CryptonightR_instruction_mov239, - CryptonightR_instruction_mov240, - CryptonightR_instruction_mov241, - CryptonightR_instruction_mov242, - CryptonightR_instruction_mov243, - CryptonightR_instruction_mov244, - CryptonightR_instruction_mov245, - CryptonightR_instruction_mov246, - CryptonightR_instruction_mov247, - CryptonightR_instruction_mov248, - CryptonightR_instruction_mov249, - CryptonightR_instruction_mov250, - CryptonightR_instruction_mov251, - CryptonightR_instruction_mov252, - CryptonightR_instruction_mov253, - CryptonightR_instruction_mov254, - CryptonightR_instruction_mov255, - CryptonightR_instruction_mov256, -}; diff --git a/src/crypto/asm/win64/CryptonightR_template.inc b/src/crypto/asm/win64/CryptonightR_template.inc deleted file mode 100644 index 1dae434a..00000000 --- a/src/crypto/asm/win64/CryptonightR_template.inc +++ /dev/null @@ -1,529 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightR_template_part1) -PUBLIC FN_PREFIX(CryptonightR_template_mainloop) -PUBLIC FN_PREFIX(CryptonightR_template_part2) -PUBLIC FN_PREFIX(CryptonightR_template_part3) -PUBLIC FN_PREFIX(CryptonightR_template_end) -PUBLIC FN_PREFIX(CryptonightR_template_double_part1) -PUBLIC FN_PREFIX(CryptonightR_template_double_mainloop) -PUBLIC FN_PREFIX(CryptonightR_template_double_part2) -PUBLIC FN_PREFIX(CryptonightR_template_double_part3) -PUBLIC FN_PREFIX(CryptonightR_template_double_part4) -PUBLIC FN_PREFIX(CryptonightR_template_double_end) - -ALIGN(64) -FN_PREFIX(CryptonightR_template_part1): - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movd xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movd xmm0, r12 - movd xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movd xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -FN_PREFIX(CryptonightR_template_mainloop): - movdqa xmm5, XMMWORD PTR [r9+r11] - movd xmm0, r15 - movd xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - - mov r12d, r9d - mov eax, r9d - xor r9d, 48 - xor r12d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r12+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - pxor xmm0, xmm2 - pxor xmm5, xmm1 - pxor xmm5, xmm0 - paddq xmm3, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm3 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movd r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - -FN_PREFIX(CryptonightR_template_part2): - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor rsp, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov rax, r13 - mul r12 - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - movaps xmm3, xmm1 - movdqa xmm2, XMMWORD PTR [r9+r11] - movdqa xmm0, XMMWORD PTR [r10+r11] - pxor xmm1, xmm2 - pxor xmm5, xmm0 - pxor xmm5, xmm1 - paddq xmm3, xmm4 - paddq xmm2, xmm6 - paddq xmm0, xmm7 - movdqu XMMWORD PTR [r9+r11], xmm0 - movdqu XMMWORD PTR [r12+r11], xmm2 - movdqu XMMWORD PTR [r10+r11], xmm3 - - movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz FN_PREFIX(CryptonightR_template_mainloop) - -FN_PREFIX(CryptonightR_template_part3): - movd rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -FN_PREFIX(CryptonightR_template_end): - -ALIGN(64) -FN_PREFIX(CryptonightR_template_double_part1): - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movd xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movd xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movd xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movd xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movd xmm0, rcx - mov r11d, 524288 - movd xmm10, rax - punpcklqdq xmm10, xmm0 - - movd xmm14, QWORD PTR [rsp+128] - movd xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -FN_PREFIX(CryptonightR_template_double_mainloop): - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movd xmm0, r12 - mov ecx, ebx - movd xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movd xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm1 - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - pxor xmm6, xmm0 - movd rdx, xmm6 - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movd xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm1 - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movd rdi, xmm5 - movd rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movd xmm0, rsp - movd xmm1, rsi - movd xmm2, rdi - movd xmm11, rbp - movd xmm12, r15 - movd xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -FN_PREFIX(CryptonightR_template_double_part2): - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r14, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r12, rax - - movd rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movd rsi, xmm1 - movd rdi, xmm2 - movd rbp, xmm11 - movd r15, xmm12 - movd rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movdqu xmm1, XMMWORD PTR [rcx+rsi] - pxor xmm6, xmm1 - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - pxor xmm6, xmm2 - paddq xmm2, xmm3 - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - pxor xmm6, xmm0 - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movd rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movd xmm0, rsp - movd xmm1, rbx - movd xmm2, rsi - movd xmm11, rdi - movd xmm12, rbp - movd xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movd xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -FN_PREFIX(CryptonightR_template_double_part3): - - movd r15, xmm13 - - mov eax, edi - mov edx, ebp - shl rdx, 32 - or rax, rdx - xor r15, rax - - mov eax, ebx - mov edx, esi - shl rdx, 32 - or rax, rdx - xor r13, rax - - movd rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movd rbx, xmm1 - movd rsi, xmm2 - movd rdi, xmm11 - movd rbp, xmm12 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - mov rdi, rcx - mov r8, rax - movdqu xmm1, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm1 - xor ebp, 48 - paddq xmm1, xmm8 - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm2 - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - pxor xmm5, xmm0 - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movd rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz FN_PREFIX(CryptonightR_template_double_mainloop) - -FN_PREFIX(CryptonightR_template_double_part4): - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -FN_PREFIX(CryptonightR_template_double_end): diff --git a/src/crypto/asm/win64/CryptonightR_template_win.inc b/src/crypto/asm/win64/CryptonightR_template_win.inc index 2f2d71a2..60ee3441 100644 --- a/src/crypto/asm/win64/CryptonightR_template_win.inc +++ b/src/crypto/asm/win64/CryptonightR_template_win.inc @@ -70,29 +70,30 @@ CryptonightR_template_mainloop: aesenc xmm5, xmm4 - mov r12d, r9d + mov r13d, r9d mov eax, r9d xor r9d, 48 - xor r12d, 16 + xor r13d, 16 xor eax, 32 movdqu xmm0, XMMWORD PTR [r9+r11] movaps xmm3, xmm0 - movdqu xmm2, XMMWORD PTR [r12+r11] + movdqu xmm2, XMMWORD PTR [r13+r11] movdqu xmm1, XMMWORD PTR [rax+r11] pxor xmm0, xmm2 pxor xmm5, xmm1 pxor xmm5, xmm0 + + movd r12, xmm5 + movd r10d, xmm5 + and r10d, 2097136 + paddq xmm3, xmm7 paddq xmm2, xmm6 paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm3 + movdqu XMMWORD PTR [r13+r11], xmm3 movdqu XMMWORD PTR [rax+r11], xmm2 movdqu XMMWORD PTR [r9+r11], xmm1 - movd r12, xmm5 - movd r10d, xmm5 - and r10d, 2097136 - movdqa xmm0, xmm5 pxor xmm0, xmm6 movdqu XMMWORD PTR [rdx], xmm0 @@ -102,14 +103,16 @@ CryptonightR_template_mainloop: shl rdx, 32 or r13, rdx - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - movd eax, xmm6 movd edx, xmm7 pextrd r9d, xmm7, 2 + xor r13, QWORD PTR [r10+r11] + mov r14, QWORD PTR [r10+r11+8] + CryptonightR_template_part2: + lea rcx, [r10+r11] + mov eax, edi mov edx, ebp shl rdx, 32 @@ -124,6 +127,8 @@ CryptonightR_template_part2: mov rax, r13 mul r12 + add r15, rax + add rsp, rdx mov r9d, r10d mov r12d, r10d @@ -145,13 +150,10 @@ CryptonightR_template_part2: movdqu XMMWORD PTR [r10+r11], xmm3 movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp + mov QWORD PTR [rcx], rsp xor rsp, r13 mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 + mov QWORD PTR [rcx+8], r15 and r9d, 2097136 xor r15, r14 movdqa xmm6, xmm5 diff --git a/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc new file mode 100644 index 00000000..68209036 --- /dev/null +++ b/src/crypto/asm/win64/CryptonightWOW_soft_aes_template_win.inc @@ -0,0 +1,266 @@ +PUBLIC CryptonightWOW_soft_aes_template_part1 +PUBLIC CryptonightWOW_soft_aes_template_mainloop +PUBLIC CryptonightWOW_soft_aes_template_part2 +PUBLIC CryptonightWOW_soft_aes_template_part3 +PUBLIC CryptonightWOW_soft_aes_template_end + +ALIGN(64) +CryptonightWOW_soft_aes_template_part1: + mov QWORD PTR [rsp+8], rcx + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 232 + + mov eax, [rcx+96] + mov ebx, [rcx+100] + mov esi, [rcx+104] + mov edx, [rcx+108] + mov [rsp+144], eax + mov [rsp+148], ebx + mov [rsp+152], esi + mov [rsp+156], edx + + mov rax, QWORD PTR [rcx+48] + mov r10, rcx + xor rax, QWORD PTR [rcx+16] + mov r8, QWORD PTR [rcx+32] + xor r8, QWORD PTR [rcx] + mov r9, QWORD PTR [rcx+40] + xor r9, QWORD PTR [rcx+8] + movd xmm4, rax + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov r11, QWORD PTR [rcx+224] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r10+72] + mov rax, QWORD PTR [r10+80] + movd xmm0, rdx + xor rax, QWORD PTR [r10+64] + + movaps XMMWORD PTR [rsp+16], xmm6 + movaps XMMWORD PTR [rsp+32], xmm7 + movaps XMMWORD PTR [rsp+48], xmm8 + movaps XMMWORD PTR [rsp+64], xmm9 + movaps XMMWORD PTR [rsp+80], xmm10 + movaps XMMWORD PTR [rsp+96], xmm11 + movaps XMMWORD PTR [rsp+112], xmm12 + movaps XMMWORD PTR [rsp+128], xmm13 + + movd xmm5, rax + + mov rax, r8 + punpcklqdq xmm4, xmm0 + and eax, 2097136 + movd xmm10, QWORD PTR [r10+96] + movd xmm0, rcx + mov rcx, QWORD PTR [r10+104] + xorps xmm9, xmm9 + mov QWORD PTR [rsp+328], rax + movd xmm12, r11 + mov QWORD PTR [rsp+320], r9 + punpcklqdq xmm5, xmm0 + movd xmm13, rcx + mov r12d, 524288 + + ALIGN(64) +CryptonightWOW_soft_aes_template_mainloop: + movd xmm11, r12d + mov r12, QWORD PTR [r10+272] + lea r13, QWORD PTR [rax+r11] + mov esi, DWORD PTR [r13] + movd xmm0, r9 + mov r10d, DWORD PTR [r13+4] + movd xmm7, r8 + mov ebp, DWORD PTR [r13+12] + mov r14d, DWORD PTR [r13+8] + mov rdx, QWORD PTR [rsp+328] + movzx ecx, sil + shr esi, 8 + punpcklqdq xmm7, xmm0 + mov r15d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + mov edi, DWORD PTR [r12+rcx*4] + movzx ecx, r14b + shr r14d, 8 + mov ebx, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + shr ebp, 8 + mov r9d, DWORD PTR [r12+rcx*4] + movzx ecx, r10b + shr r10d, 8 + xor r15d, DWORD PTR [r12+rcx*4+1024] + movzx ecx, r14b + shr r14d, 8 + mov eax, r14d + shr eax, 8 + xor edi, DWORD PTR [r12+rcx*4+1024] + add eax, 256 + movzx ecx, bpl + shr ebp, 8 + xor ebx, DWORD PTR [r12+rcx*4+1024] + movzx ecx, sil + shr esi, 8 + xor r9d, DWORD PTR [r12+rcx*4+1024] + add r12, 2048 + movzx ecx, r10b + shr r10d, 8 + add r10d, 256 + mov r11d, DWORD PTR [r12+rax*4] + xor r11d, DWORD PTR [r12+rcx*4] + xor r11d, r9d + movzx ecx, sil + mov r10d, DWORD PTR [r12+r10*4] + shr esi, 8 + add esi, 256 + xor r10d, DWORD PTR [r12+rcx*4] + movzx ecx, bpl + xor r10d, ebx + shr ebp, 8 + movd xmm1, r11d + add ebp, 256 + movd r11, xmm12 + mov r9d, DWORD PTR [r12+rcx*4] + xor r9d, DWORD PTR [r12+rsi*4] + mov eax, DWORD PTR [r12+rbp*4] + xor r9d, edi + movzx ecx, r14b + movd xmm0, r10d + movd xmm2, r9d + xor eax, DWORD PTR [r12+rcx*4] + mov rcx, rdx + xor eax, r15d + punpckldq xmm2, xmm1 + xor rcx, 16 + movd xmm6, eax + mov rax, rdx + punpckldq xmm6, xmm0 + xor rax, 32 + punpckldq xmm6, xmm2 + xor rdx, 48 + movdqu xmm2, XMMWORD PTR [rcx+r11] + pxor xmm6, xmm7 + paddq xmm2, xmm4 + movdqu xmm1, XMMWORD PTR [rax+r11] + movdqu xmm0, XMMWORD PTR [rdx+r11] + paddq xmm0, xmm5 + movdqu XMMWORD PTR [rcx+r11], xmm0 + movdqu XMMWORD PTR [rax+r11], xmm2 + movd rcx, xmm13 + paddq xmm1, xmm7 + movdqu XMMWORD PTR [rdx+r11], xmm1 + movd rdi, xmm6 + mov r10, rdi + and r10d, 2097136 + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [r13], xmm0 + + mov ebx, [rsp+144] + mov ebp, [rsp+152] + add ebx, [rsp+148] + add ebp, [rsp+156] + shl rbp, 32 + or rbx, rbp + + xor rbx, QWORD PTR [r10+r11] + lea r14, QWORD PTR [r10+r11] + mov rbp, QWORD PTR [r14+8] + + mov [rsp+160], rbx + mov [rsp+168], rdi + mov [rsp+176], rbp + mov [rsp+184], r10 + mov r10, rsp + + mov ebx, [rsp+144] + mov esi, [rsp+148] + mov edi, [rsp+152] + mov ebp, [rsp+156] + + movd esp, xmm7 + movaps xmm0, xmm7 + psrldq xmm0, 8 + movd r15d, xmm0 + movd eax, xmm4 + movd edx, xmm5 + +CryptonightWOW_soft_aes_template_part2: + mov rsp, r10 + mov [rsp+144], ebx + mov [rsp+148], esi + mov [rsp+152], edi + mov [rsp+156], ebp + + mov rbx, [rsp+160] + mov rdi, [rsp+168] + mov rbp, [rsp+176] + mov r10, [rsp+184] + + mov r9, r10 + xor r9, 16 + mov rcx, r10 + xor rcx, 32 + xor r10, 48 + mov rax, rbx + mul rdi + movdqu xmm2, XMMWORD PTR [r9+r11] + movdqu xmm1, XMMWORD PTR [rcx+r11] + paddq xmm1, xmm7 + movd xmm0, rax + movd xmm3, rdx + xor rax, QWORD PTR [r11+rcx+8] + xor rdx, QWORD PTR [rcx+r11] + punpcklqdq xmm3, xmm0 + add r8, rdx + movdqu xmm0, XMMWORD PTR [r10+r11] + pxor xmm2, xmm3 + paddq xmm0, xmm5 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [r9+r11], xmm0 + movdqa xmm5, xmm4 + mov r9, QWORD PTR [rsp+320] + movdqa xmm4, xmm6 + add r9, rax + movdqu XMMWORD PTR [rcx+r11], xmm2 + movdqu XMMWORD PTR [r10+r11], xmm1 + mov r10, QWORD PTR [rsp+304] + movd r12d, xmm11 + mov QWORD PTR [r14], r8 + xor r8, rbx + mov rax, r8 + mov QWORD PTR [r14+8], r9 + and eax, 2097136 + xor r9, rbp + mov QWORD PTR [rsp+320], r9 + mov QWORD PTR [rsp+328], rax + sub r12d, 1 + jne CryptonightWOW_soft_aes_template_mainloop + +CryptonightWOW_soft_aes_template_part3: + movaps xmm6, XMMWORD PTR [rsp+16] + movaps xmm7, XMMWORD PTR [rsp+32] + movaps xmm8, XMMWORD PTR [rsp+48] + movaps xmm9, XMMWORD PTR [rsp+64] + movaps xmm10, XMMWORD PTR [rsp+80] + movaps xmm11, XMMWORD PTR [rsp+96] + movaps xmm12, XMMWORD PTR [rsp+112] + movaps xmm13, XMMWORD PTR [rsp+128] + + add rsp, 232 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + ret +CryptonightWOW_soft_aes_template_end: diff --git a/src/crypto/asm/win64/CryptonightWOW_template.inc b/src/crypto/asm/win64/CryptonightWOW_template.inc deleted file mode 100644 index 47fbc94f..00000000 --- a/src/crypto/asm/win64/CryptonightWOW_template.inc +++ /dev/null @@ -1,486 +0,0 @@ -PUBLIC FN_PREFIX(CryptonightWOW_template_part1) -PUBLIC FN_PREFIX(CryptonightWOW_template_mainloop) -PUBLIC FN_PREFIX(CryptonightWOW_template_part2) -PUBLIC FN_PREFIX(CryptonightWOW_template_part3) -PUBLIC FN_PREFIX(CryptonightWOW_template_end) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part1) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_mainloop) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part2) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part3) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_part4) -PUBLIC FN_PREFIX(CryptonightWOW_template_double_end) - -ALIGN(64) -FN_PREFIX(CryptonightWOW_template_part1): - mov QWORD PTR [rsp+16], rbx - mov QWORD PTR [rsp+24], rbp - mov QWORD PTR [rsp+32], rsi - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push rdi - sub rsp, 64 - mov r12, rcx - mov r8, QWORD PTR [r12+32] - mov rdx, r12 - xor r8, QWORD PTR [r12] - mov r15, QWORD PTR [r12+40] - mov r9, r8 - xor r15, QWORD PTR [r12+8] - mov r11, QWORD PTR [r12+224] - mov r12, QWORD PTR [r12+56] - xor r12, QWORD PTR [rdx+24] - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - movaps XMMWORD PTR [rsp+48], xmm6 - movd xmm0, r12 - movaps XMMWORD PTR [rsp+32], xmm7 - movaps XMMWORD PTR [rsp+16], xmm8 - movaps XMMWORD PTR [rsp], xmm9 - mov r12, QWORD PTR [rdx+88] - xor r12, QWORD PTR [rdx+72] - movd xmm6, rax - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm6, xmm0 - and r9d, 2097136 - movd xmm0, r12 - movd xmm7, rax - punpcklqdq xmm7, xmm0 - mov r10d, r9d - movd xmm9, rsp - mov rsp, r8 - mov r8d, 524288 - - mov ebx, [rdx+96] - mov esi, [rdx+100] - mov edi, [rdx+104] - mov ebp, [rdx+108] - - ALIGN(64) -FN_PREFIX(CryptonightWOW_template_mainloop): - movdqa xmm5, XMMWORD PTR [r9+r11] - movd xmm0, r15 - movd xmm4, rsp - punpcklqdq xmm4, xmm0 - lea rdx, QWORD PTR [r9+r11] - - aesenc xmm5, xmm4 - movd r10d, xmm5 - and r10d, 2097136 - - mov r12d, r9d - mov eax, r9d - xor r9d, 48 - xor r12d, 16 - xor eax, 32 - movdqu xmm0, XMMWORD PTR [r9+r11] - movdqu xmm2, XMMWORD PTR [r12+r11] - movdqu xmm1, XMMWORD PTR [rax+r11] - paddq xmm0, xmm7 - paddq xmm2, xmm6 - paddq xmm1, xmm4 - movdqu XMMWORD PTR [r12+r11], xmm0 - movd r12, xmm5 - movdqu XMMWORD PTR [rax+r11], xmm2 - movdqu XMMWORD PTR [r9+r11], xmm1 - - movdqa xmm0, xmm5 - pxor xmm0, xmm6 - movdqu XMMWORD PTR [rdx], xmm0 - - lea r13d, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or r13, rdx - - xor r13, QWORD PTR [r10+r11] - mov r14, QWORD PTR [r10+r11+8] - - movd eax, xmm6 - movd edx, xmm7 - pextrd r9d, xmm7, 2 - -FN_PREFIX(CryptonightWOW_template_part2): - mov rax, r13 - mul r12 - movd xmm0, rax - movd xmm3, rdx - punpcklqdq xmm3, xmm0 - - mov r9d, r10d - mov r12d, r10d - xor r9d, 16 - xor r12d, 32 - xor r10d, 48 - movdqa xmm1, XMMWORD PTR [r12+r11] - xor rdx, QWORD PTR [r12+r11] - xor rax, QWORD PTR [r11+r12+8] - movdqa xmm2, XMMWORD PTR [r9+r11] - pxor xmm3, xmm2 - paddq xmm7, XMMWORD PTR [r10+r11] - paddq xmm1, xmm4 - paddq xmm3, xmm6 - movdqu XMMWORD PTR [r9+r11], xmm7 - movdqu XMMWORD PTR [r12+r11], xmm3 - movdqu XMMWORD PTR [r10+r11], xmm1 - - movdqa xmm7, xmm6 - add r15, rax - add rsp, rdx - xor r10, 48 - mov QWORD PTR [r10+r11], rsp - xor rsp, r13 - mov r9d, esp - mov QWORD PTR [r10+r11+8], r15 - and r9d, 2097136 - xor r15, r14 - movdqa xmm6, xmm5 - dec r8d - jnz FN_PREFIX(CryptonightWOW_template_mainloop) - -FN_PREFIX(CryptonightWOW_template_part3): - movd rsp, xmm9 - - mov rbx, QWORD PTR [rsp+136] - mov rbp, QWORD PTR [rsp+144] - mov rsi, QWORD PTR [rsp+152] - movaps xmm6, XMMWORD PTR [rsp+48] - movaps xmm7, XMMWORD PTR [rsp+32] - movaps xmm8, XMMWORD PTR [rsp+16] - movaps xmm9, XMMWORD PTR [rsp] - add rsp, 64 - pop rdi - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - ret 0 -FN_PREFIX(CryptonightWOW_template_end): - -ALIGN(64) -FN_PREFIX(CryptonightWOW_template_double_part1): - mov QWORD PTR [rsp+24], rbx - push rbp - push rsi - push rdi - push r12 - push r13 - push r14 - push r15 - sub rsp, 320 - mov r14, QWORD PTR [rcx+32] - mov r8, rcx - xor r14, QWORD PTR [rcx] - mov r12, QWORD PTR [rcx+40] - mov ebx, r14d - mov rsi, QWORD PTR [rcx+224] - and ebx, 2097136 - xor r12, QWORD PTR [rcx+8] - mov rcx, QWORD PTR [rcx+56] - xor rcx, QWORD PTR [r8+24] - mov rax, QWORD PTR [r8+48] - xor rax, QWORD PTR [r8+16] - mov r15, QWORD PTR [rdx+32] - xor r15, QWORD PTR [rdx] - movd xmm0, rcx - mov rcx, QWORD PTR [r8+88] - xor rcx, QWORD PTR [r8+72] - mov r13, QWORD PTR [rdx+40] - mov rdi, QWORD PTR [rdx+224] - xor r13, QWORD PTR [rdx+8] - movaps XMMWORD PTR [rsp+160], xmm6 - movaps XMMWORD PTR [rsp+176], xmm7 - movaps XMMWORD PTR [rsp+192], xmm8 - movaps XMMWORD PTR [rsp+208], xmm9 - movaps XMMWORD PTR [rsp+224], xmm10 - movaps XMMWORD PTR [rsp+240], xmm11 - movaps XMMWORD PTR [rsp+256], xmm12 - movaps XMMWORD PTR [rsp+272], xmm13 - movaps XMMWORD PTR [rsp+288], xmm14 - movaps XMMWORD PTR [rsp+304], xmm15 - movd xmm7, rax - mov rax, QWORD PTR [r8+80] - xor rax, QWORD PTR [r8+64] - - movaps xmm1, XMMWORD PTR [rdx+96] - movaps xmm2, XMMWORD PTR [r8+96] - movaps XMMWORD PTR [rsp], xmm1 - movaps XMMWORD PTR [rsp+16], xmm2 - - mov r8d, r15d - punpcklqdq xmm7, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+56] - xor rcx, QWORD PTR [rdx+24] - movd xmm9, rax - mov QWORD PTR [rsp+128], rsi - mov rax, QWORD PTR [rdx+48] - xor rax, QWORD PTR [rdx+16] - punpcklqdq xmm9, xmm0 - movd xmm0, rcx - mov rcx, QWORD PTR [rdx+88] - xor rcx, QWORD PTR [rdx+72] - movd xmm8, rax - mov QWORD PTR [rsp+136], rdi - mov rax, QWORD PTR [rdx+80] - xor rax, QWORD PTR [rdx+64] - punpcklqdq xmm8, xmm0 - and r8d, 2097136 - movd xmm0, rcx - mov r11d, 524288 - movd xmm10, rax - punpcklqdq xmm10, xmm0 - - movd xmm14, QWORD PTR [rsp+128] - movd xmm15, QWORD PTR [rsp+136] - - ALIGN(64) -FN_PREFIX(CryptonightWOW_template_double_mainloop): - movdqu xmm6, XMMWORD PTR [rbx+rsi] - movd xmm0, r12 - mov ecx, ebx - movd xmm3, r14 - punpcklqdq xmm3, xmm0 - xor ebx, 16 - aesenc xmm6, xmm3 - movd rdx, xmm6 - movd xmm4, r15 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - xor ebx, 48 - paddq xmm0, xmm7 - movdqu xmm1, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm0 - paddq xmm1, xmm3 - xor ebx, 16 - mov eax, ebx - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbx+rsi] - movdqu XMMWORD PTR [rbx+rsi], xmm1 - paddq xmm0, xmm9 - movdqu XMMWORD PTR [rax+rsi], xmm0 - movdqa xmm0, xmm6 - pxor xmm0, xmm7 - movdqu XMMWORD PTR [rcx+rsi], xmm0 - mov esi, edx - movdqu xmm5, XMMWORD PTR [r8+rdi] - and esi, 2097136 - mov ecx, r8d - movd xmm0, r13 - punpcklqdq xmm4, xmm0 - xor r8d, 16 - aesenc xmm5, xmm4 - movdqu xmm0, XMMWORD PTR [r8+rdi] - xor r8d, 48 - paddq xmm0, xmm8 - movdqu xmm1, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm0 - paddq xmm1, xmm4 - xor r8d, 16 - mov eax, r8d - xor rax, 32 - movdqu xmm0, XMMWORD PTR [r8+rdi] - movdqu XMMWORD PTR [r8+rdi], xmm1 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rdi], xmm0 - movdqa xmm0, xmm5 - pxor xmm0, xmm8 - movdqu XMMWORD PTR [rcx+rdi], xmm0 - movd rdi, xmm5 - movd rcx, xmm14 - mov ebp, edi - mov r8, QWORD PTR [rcx+rsi] - mov r10, QWORD PTR [rcx+rsi+8] - lea r9, QWORD PTR [rcx+rsi] - xor esi, 16 - - movd xmm0, rsp - movd xmm1, rsi - movd xmm2, rdi - movd xmm11, rbp - movd xmm12, r15 - movd xmm13, rdx - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp+16] - mov esi, DWORD PTR [rsp+20] - mov edi, DWORD PTR [rsp+24] - mov ebp, DWORD PTR [rsp+28] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - xor r8, rax - - movd esp, xmm3 - pextrd r15d, xmm3, 2 - movd eax, xmm7 - movd edx, xmm9 - pextrd r9d, xmm9, 2 - -FN_PREFIX(CryptonightWOW_template_double_part2): - - movd rsp, xmm0 - mov DWORD PTR [rsp+16], ebx - mov DWORD PTR [rsp+20], esi - mov DWORD PTR [rsp+24], edi - mov DWORD PTR [rsp+28], ebp - - movd rsi, xmm1 - movd rdi, xmm2 - movd rbp, xmm11 - movd r15, xmm12 - movd rdx, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rbx, r8 - mov rax, r8 - mul rdx - and ebp, 2097136 - mov r8, rax - movd xmm1, rdx - movd xmm0, r8 - punpcklqdq xmm1, xmm0 - pxor xmm1, XMMWORD PTR [rcx+rsi] - xor esi, 48 - paddq xmm1, xmm7 - movdqu xmm2, XMMWORD PTR [rsi+rcx] - xor rdx, QWORD PTR [rsi+rcx] - paddq xmm2, xmm3 - xor r8, QWORD PTR [rsi+rcx+8] - movdqu XMMWORD PTR [rsi+rcx], xmm1 - xor esi, 16 - mov eax, esi - mov rsi, rcx - movdqu xmm0, XMMWORD PTR [rax+rcx] - movdqu XMMWORD PTR [rax+rcx], xmm2 - paddq xmm0, xmm9 - add r12, r8 - xor rax, 32 - add r14, rdx - movdqa xmm9, xmm7 - movdqa xmm7, xmm6 - movdqu XMMWORD PTR [rax+rcx], xmm0 - mov QWORD PTR [r9+8], r12 - xor r12, r10 - mov QWORD PTR [r9], r14 - movd rcx, xmm15 - xor r14, rbx - mov r10d, ebp - mov ebx, r14d - xor ebp, 16 - and ebx, 2097136 - mov r8, QWORD PTR [r10+rcx] - mov r9, QWORD PTR [r10+rcx+8] - - movd xmm0, rsp - movd xmm1, rbx - movd xmm2, rsi - movd xmm11, rdi - movd xmm12, rbp - movd xmm13, r15 - mov [rsp+104], rcx - mov [rsp+112], r9 - - mov ebx, DWORD PTR [rsp] - mov esi, DWORD PTR [rsp+4] - mov edi, DWORD PTR [rsp+8] - mov ebp, DWORD PTR [rsp+12] - - lea eax, [ebx+esi] - lea edx, [edi+ebp] - shl rdx, 32 - or rax, rdx - - xor r8, rax - movd xmm3, r8 - - movd esp, xmm4 - pextrd r15d, xmm4, 2 - movd eax, xmm8 - movd edx, xmm10 - pextrd r9d, xmm10, 2 - -FN_PREFIX(CryptonightWOW_template_double_part3): - - movd rsp, xmm0 - mov DWORD PTR [rsp], ebx - mov DWORD PTR [rsp+4], esi - mov DWORD PTR [rsp+8], edi - mov DWORD PTR [rsp+12], ebp - - movd rbx, xmm1 - movd rsi, xmm2 - movd rdi, xmm11 - movd rbp, xmm12 - movd r15, xmm13 - mov rcx, [rsp+104] - mov r9, [rsp+112] - - mov rax, r8 - mul rdi - movd xmm1, rdx - movd xmm0, rax - punpcklqdq xmm1, xmm0 - mov rdi, rcx - mov r8, rax - pxor xmm1, XMMWORD PTR [rbp+rcx] - xor ebp, 48 - paddq xmm1, xmm8 - xor r8, QWORD PTR [rbp+rcx+8] - xor rdx, QWORD PTR [rbp+rcx] - add r13, r8 - movdqu xmm2, XMMWORD PTR [rbp+rcx] - add r15, rdx - movdqu XMMWORD PTR [rbp+rcx], xmm1 - paddq xmm2, xmm4 - xor ebp, 16 - mov eax, ebp - xor rax, 32 - movdqu xmm0, XMMWORD PTR [rbp+rcx] - movdqu XMMWORD PTR [rbp+rcx], xmm2 - paddq xmm0, xmm10 - movdqu XMMWORD PTR [rax+rcx], xmm0 - movd rax, xmm3 - movdqa xmm10, xmm8 - mov QWORD PTR [r10+rcx], r15 - movdqa xmm8, xmm5 - xor r15, rax - mov QWORD PTR [r10+rcx+8], r13 - mov r8d, r15d - xor r13, r9 - and r8d, 2097136 - dec r11d - jnz FN_PREFIX(CryptonightWOW_template_double_mainloop) - -FN_PREFIX(CryptonightWOW_template_double_part4): - - mov rbx, QWORD PTR [rsp+400] - movaps xmm6, XMMWORD PTR [rsp+160] - movaps xmm7, XMMWORD PTR [rsp+176] - movaps xmm8, XMMWORD PTR [rsp+192] - movaps xmm9, XMMWORD PTR [rsp+208] - movaps xmm10, XMMWORD PTR [rsp+224] - movaps xmm11, XMMWORD PTR [rsp+240] - movaps xmm12, XMMWORD PTR [rsp+256] - movaps xmm13, XMMWORD PTR [rsp+272] - movaps xmm14, XMMWORD PTR [rsp+288] - movaps xmm15, XMMWORD PTR [rsp+304] - add rsp, 320 - pop r15 - pop r14 - pop r13 - pop r12 - pop rdi - pop rsi - pop rbp - ret 0 -FN_PREFIX(CryptonightWOW_template_double_end): diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc new file mode 100644 index 00000000..69ca8793 --- /dev/null +++ b/src/crypto/asm/win64/cn2/cnv2_rwz_double_main_loop.inc @@ -0,0 +1,410 @@ + mov rax, rsp + push rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 184 + + stmxcsr DWORD PTR [rsp+272] + mov DWORD PTR [rsp+276], 24448 + ldmxcsr DWORD PTR [rsp+276] + + mov r13, QWORD PTR [rcx+224] + mov r9, rdx + mov r10, QWORD PTR [rcx+32] + mov r8, rcx + xor r10, QWORD PTR [rcx] + mov r14d, 393216 + mov r11, QWORD PTR [rcx+40] + xor r11, QWORD PTR [rcx+8] + mov rsi, QWORD PTR [rdx+224] + mov rdx, QWORD PTR [rcx+56] + xor rdx, QWORD PTR [rcx+24] + mov rdi, QWORD PTR [r9+32] + xor rdi, QWORD PTR [r9] + mov rbp, QWORD PTR [r9+40] + xor rbp, QWORD PTR [r9+8] + movd xmm0, rdx + movaps XMMWORD PTR [rax-88], xmm6 + movaps XMMWORD PTR [rax-104], xmm7 + movaps XMMWORD PTR [rax-120], xmm8 + movaps XMMWORD PTR [rsp+112], xmm9 + movaps XMMWORD PTR [rsp+96], xmm10 + movaps XMMWORD PTR [rsp+80], xmm11 + movaps XMMWORD PTR [rsp+64], xmm12 + movaps XMMWORD PTR [rsp+48], xmm13 + movaps XMMWORD PTR [rsp+32], xmm14 + movaps XMMWORD PTR [rsp+16], xmm15 + mov rdx, r10 + movd xmm4, QWORD PTR [r8+96] + and edx, 2097136 + mov rax, QWORD PTR [rcx+48] + xorps xmm13, xmm13 + xor rax, QWORD PTR [rcx+16] + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r8+72] + movd xmm5, QWORD PTR [r8+104] + movd xmm7, rax + + mov eax, 1 + shl rax, 52 + movd xmm14, rax + punpcklqdq xmm14, xmm14 + + mov eax, 1023 + shl rax, 52 + movd xmm12, rax + punpcklqdq xmm12, xmm12 + + mov rax, QWORD PTR [r8+80] + xor rax, QWORD PTR [r8+64] + punpcklqdq xmm7, xmm0 + movd xmm0, rcx + mov rcx, QWORD PTR [r9+56] + xor rcx, QWORD PTR [r9+24] + movd xmm3, rax + mov rax, QWORD PTR [r9+48] + xor rax, QWORD PTR [r9+16] + punpcklqdq xmm3, xmm0 + movd xmm0, rcx + mov QWORD PTR [rsp], r13 + mov rcx, QWORD PTR [r9+88] + xor rcx, QWORD PTR [r9+72] + movd xmm6, rax + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + punpcklqdq xmm6, xmm0 + movd xmm0, rcx + mov QWORD PTR [rsp+256], r10 + mov rcx, rdi + mov QWORD PTR [rsp+264], r11 + movd xmm8, rax + and ecx, 2097136 + punpcklqdq xmm8, xmm0 + movd xmm0, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movd xmm0, QWORD PTR [r9+104] + lea r8, QWORD PTR [rcx+rsi] + movdqu xmm11, XMMWORD PTR [r8] + punpcklqdq xmm5, xmm0 + lea r9, QWORD PTR [rdx+r13] + movdqu xmm15, XMMWORD PTR [r9] + + ALIGN(64) +rwz_main_loop_double: + movdqu xmm9, xmm15 + mov eax, edx + mov ebx, edx + xor eax, 16 + xor ebx, 32 + xor edx, 48 + + movd xmm0, r11 + movd xmm2, r10 + punpcklqdq xmm2, xmm0 + aesenc xmm9, xmm2 + + movdqu xmm0, XMMWORD PTR [rdx+r13] + movdqu xmm1, XMMWORD PTR [rbx+r13] + paddq xmm0, xmm7 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [rbx+r13], xmm0 + movdqu xmm0, XMMWORD PTR [rax+r13] + movdqu XMMWORD PTR [rdx+r13], xmm1 + paddq xmm0, xmm3 + movdqu XMMWORD PTR [rax+r13], xmm0 + + movd r11, xmm9 + mov edx, r11d + and edx, 2097136 + movdqa xmm0, xmm9 + pxor xmm0, xmm7 + movdqu XMMWORD PTR [r9], xmm0 + + lea rbx, QWORD PTR [rdx+r13] + mov r10, QWORD PTR [rdx+r13] + + movdqu xmm10, xmm11 + movd xmm0, rbp + movd xmm11, rdi + punpcklqdq xmm11, xmm0 + aesenc xmm10, xmm11 + + mov eax, ecx + mov r12d, ecx + xor eax, 16 + xor r12d, 32 + xor ecx, 48 + + movdqu xmm0, XMMWORD PTR [rcx+rsi] + paddq xmm0, xmm6 + movdqu xmm1, XMMWORD PTR [r12+rsi] + movdqu XMMWORD PTR [r12+rsi], xmm0 + paddq xmm1, xmm11 + movdqu xmm0, XMMWORD PTR [rax+rsi] + movdqu XMMWORD PTR [rcx+rsi], xmm1 + paddq xmm0, xmm8 + movdqu XMMWORD PTR [rax+rsi], xmm0 + + movd rcx, xmm10 + and ecx, 2097136 + + movdqa xmm0, xmm10 + pxor xmm0, xmm6 + movdqu XMMWORD PTR [r8], xmm0 + mov r12, QWORD PTR [rcx+rsi] + + mov r9, QWORD PTR [rbx+8] + + xor edx, 16 + mov r8d, edx + mov r15d, edx + + movd rdx, xmm5 + shl rdx, 32 + movd rax, xmm4 + xor rdx, rax + xor r10, rdx + mov rax, r10 + mul r11 + mov r11d, r8d + xor r11d, 48 + movd xmm0, rdx + xor rdx, [r11+r13] + movd xmm1, rax + xor rax, [r11+r13+8] + punpcklqdq xmm0, xmm1 + + pxor xmm0, XMMWORD PTR [r8+r13] + movdqu xmm1, XMMWORD PTR [r11+r13] + paddq xmm0, xmm3 + paddq xmm1, xmm2 + movdqu XMMWORD PTR [r8+r13], xmm0 + xor r8d, 32 + movdqu xmm0, XMMWORD PTR [r8+r13] + movdqu XMMWORD PTR [r8+r13], xmm1 + paddq xmm0, xmm7 + movdqu XMMWORD PTR [r11+r13], xmm0 + + mov r11, QWORD PTR [rsp+256] + add r11, rdx + mov rdx, QWORD PTR [rsp+264] + add rdx, rax + mov QWORD PTR [rbx], r11 + xor r11, r10 + mov QWORD PTR [rbx+8], rdx + xor rdx, r9 + mov QWORD PTR [rsp+256], r11 + and r11d, 2097136 + mov QWORD PTR [rsp+264], rdx + mov QWORD PTR [rsp+8], r11 + lea r15, QWORD PTR [r11+r13] + movdqu xmm15, XMMWORD PTR [r11+r13] + lea r13, QWORD PTR [rsi+rcx] + movdqa xmm0, xmm5 + psrldq xmm0, 8 + movaps xmm2, xmm13 + movd r10, xmm0 + psllq xmm5, 1 + shl r10, 32 + movdqa xmm0, xmm9 + psrldq xmm0, 8 + movdqa xmm1, xmm10 + movd r11, xmm0 + psrldq xmm1, 8 + movd r8, xmm1 + psrldq xmm4, 8 + movaps xmm0, xmm13 + movd rax, xmm4 + xor r10, rax + movaps xmm1, xmm13 + xor r10, r12 + lea rax, QWORD PTR [r11+1] + shr rax, 1 + movdqa xmm3, xmm9 + punpcklqdq xmm3, xmm10 + paddq xmm5, xmm3 + movd rdx, xmm5 + psrldq xmm5, 8 + cvtsi2sd xmm2, rax + or edx, -2147483647 + lea rax, QWORD PTR [r8+1] + shr rax, 1 + movd r9, xmm5 + cvtsi2sd xmm0, rax + or r9d, -2147483647 + cvtsi2sd xmm1, rdx + unpcklpd xmm2, xmm0 + movaps xmm0, xmm13 + cvtsi2sd xmm0, r9 + unpcklpd xmm1, xmm0 + divpd xmm2, xmm1 + paddq xmm2, xmm14 + cvttsd2si rax, xmm2 + psrldq xmm2, 8 + mov rbx, rax + imul rax, rdx + sub r11, rax + js rwz_div_fix_1 +rwz_div_fix_1_ret: + + cvttsd2si rdx, xmm2 + mov rax, rdx + imul rax, r9 + movd xmm2, r11d + movd xmm4, ebx + sub r8, rax + js rwz_div_fix_2 +rwz_div_fix_2_ret: + + movd xmm1, r8d + movd xmm0, edx + punpckldq xmm2, xmm1 + punpckldq xmm4, xmm0 + punpckldq xmm4, xmm2 + paddq xmm3, xmm4 + movdqa xmm0, xmm3 + psrlq xmm0, 12 + paddq xmm0, xmm12 + sqrtpd xmm1, xmm0 + movd r9, xmm1 + movdqa xmm5, xmm1 + psrlq xmm5, 19 + test r9, 524287 + je rwz_sqrt_fix_1 +rwz_sqrt_fix_1_ret: + + movd r9, xmm10 + psrldq xmm1, 8 + movd r8, xmm1 + test r8, 524287 + je rwz_sqrt_fix_2 +rwz_sqrt_fix_2_ret: + + mov r12d, ecx + mov r8d, ecx + xor r12d, 16 + xor r8d, 32 + xor ecx, 48 + mov rax, r10 + mul r9 + movd xmm0, rax + movd xmm3, rdx + punpcklqdq xmm3, xmm0 + + movdqu xmm0, XMMWORD PTR [r12+rsi] + pxor xmm0, xmm3 + movdqu xmm1, XMMWORD PTR [r8+rsi] + xor rdx, [r8+rsi] + xor rax, [r8+rsi+8] + movdqu xmm3, XMMWORD PTR [rcx+rsi] + paddq xmm3, xmm6 + paddq xmm1, xmm11 + paddq xmm0, xmm8 + movdqu XMMWORD PTR [r8+rsi], xmm3 + movdqu XMMWORD PTR [rcx+rsi], xmm1 + movdqu XMMWORD PTR [r12+rsi], xmm0 + + add rdi, rdx + mov QWORD PTR [r13], rdi + xor rdi, r10 + mov ecx, edi + and ecx, 2097136 + lea r8, QWORD PTR [rcx+rsi] + + mov rdx, QWORD PTR [r13+8] + add rbp, rax + mov QWORD PTR [r13+8], rbp + movdqu xmm11, XMMWORD PTR [rcx+rsi] + xor rbp, rdx + mov r13, QWORD PTR [rsp] + movdqa xmm3, xmm7 + mov rdx, QWORD PTR [rsp+8] + movdqa xmm8, xmm6 + mov r10, QWORD PTR [rsp+256] + movdqa xmm7, xmm9 + mov r11, QWORD PTR [rsp+264] + movdqa xmm6, xmm10 + mov r9, r15 + dec r14d + jne rwz_main_loop_double + + ldmxcsr DWORD PTR [rsp+272] + movaps xmm13, XMMWORD PTR [rsp+48] + lea r11, QWORD PTR [rsp+184] + movaps xmm6, XMMWORD PTR [r11-24] + movaps xmm7, XMMWORD PTR [r11-40] + movaps xmm8, XMMWORD PTR [r11-56] + movaps xmm9, XMMWORD PTR [r11-72] + movaps xmm10, XMMWORD PTR [r11-88] + movaps xmm11, XMMWORD PTR [r11-104] + movaps xmm12, XMMWORD PTR [r11-120] + movaps xmm14, XMMWORD PTR [rsp+32] + movaps xmm15, XMMWORD PTR [rsp+16] + mov rsp, r11 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + pop rbx + jmp rwz_cnv2_double_mainloop_asm_endp + +rwz_div_fix_1: + dec rbx + add r11, rdx + jmp rwz_div_fix_1_ret + +rwz_div_fix_2: + dec rdx + add r8, r9 + jmp rwz_div_fix_2_ret + +rwz_sqrt_fix_1: + movd r8, xmm3 + movdqa xmm0, xmm5 + psrldq xmm0, 8 + dec r9 + mov r11d, -1022 + shl r11, 32 + mov rax, r9 + shr r9, 19 + shr rax, 20 + mov rdx, r9 + sub rdx, rax + lea rdx, [rdx+r11+1] + add rax, r11 + imul rdx, rax + sub rdx, r8 + adc r9, 0 + movd xmm5, r9 + punpcklqdq xmm5, xmm0 + jmp rwz_sqrt_fix_1_ret + +rwz_sqrt_fix_2: + psrldq xmm3, 8 + movd r11, xmm3 + dec r8 + mov ebx, -1022 + shl rbx, 32 + mov rax, r8 + shr r8, 19 + shr rax, 20 + mov rdx, r8 + sub rdx, rax + lea rdx, [rdx+rbx+1] + add rax, rbx + imul rdx, rax + sub rdx, r11 + adc r8, 0 + movd xmm0, r8 + punpcklqdq xmm5, xmm0 + jmp rwz_sqrt_fix_2_ret + +rwz_cnv2_double_mainloop_asm_endp: diff --git a/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc new file mode 100644 index 00000000..99317730 --- /dev/null +++ b/src/crypto/asm/win64/cn2/cnv2_rwz_main_loop.inc @@ -0,0 +1,186 @@ + mov QWORD PTR [rsp+24], rbx + push rbp + push rsi + push rdi + push r12 + push r13 + push r14 + push r15 + sub rsp, 80 + + stmxcsr DWORD PTR [rsp] + mov DWORD PTR [rsp+4], 24448 + ldmxcsr DWORD PTR [rsp+4] + + mov rax, QWORD PTR [rcx+48] + mov r9, rcx + xor rax, QWORD PTR [rcx+16] + mov esi, 393216 + mov r8, QWORD PTR [rcx+32] + mov r13d, -2147483647 + xor r8, QWORD PTR [rcx] + mov r11, QWORD PTR [rcx+40] + mov r10, r8 + mov rdx, QWORD PTR [rcx+56] + movd xmm4, rax + xor rdx, QWORD PTR [rcx+24] + xor r11, QWORD PTR [rcx+8] + mov rbx, QWORD PTR [rcx+224] + mov rax, QWORD PTR [r9+80] + xor rax, QWORD PTR [r9+64] + movd xmm0, rdx + mov rcx, QWORD PTR [rcx+88] + xor rcx, QWORD PTR [r9+72] + movd xmm3, QWORD PTR [r9+104] + movaps XMMWORD PTR [rsp+64], xmm6 + movaps XMMWORD PTR [rsp+48], xmm7 + movaps XMMWORD PTR [rsp+32], xmm8 + and r10d, 2097136 + movd xmm5, rax + + xor eax, eax + mov QWORD PTR [rsp+16], rax + + mov ax, 1023 + shl rax, 52 + movd xmm8, rax + mov r15, QWORD PTR [r9+96] + punpcklqdq xmm4, xmm0 + movd xmm0, rcx + punpcklqdq xmm5, xmm0 + movdqu xmm6, XMMWORD PTR [r10+rbx] + + ALIGN(64) +rwz_main_loop: + lea rdx, QWORD PTR [r10+rbx] + mov ecx, r10d + mov eax, r10d + mov rdi, r15 + xor ecx, 16 + xor eax, 32 + xor r10d, 48 + movd xmm0, r11 + movd xmm7, r8 + punpcklqdq xmm7, xmm0 + aesenc xmm6, xmm7 + movd rbp, xmm6 + mov r9, rbp + and r9d, 2097136 + movdqu xmm0, XMMWORD PTR [rcx+rbx] + movdqu xmm1, XMMWORD PTR [rax+rbx] + movdqu xmm2, XMMWORD PTR [r10+rbx] + paddq xmm0, xmm5 + paddq xmm1, xmm7 + paddq xmm2, xmm4 + movdqu XMMWORD PTR [rcx+rbx], xmm0 + movdqu XMMWORD PTR [rax+rbx], xmm2 + movdqu XMMWORD PTR [r10+rbx], xmm1 + mov r10, r9 + xor r10d, 32 + movd rcx, xmm3 + mov rax, rcx + shl rax, 32 + xor rdi, rax + movdqa xmm0, xmm6 + pxor xmm0, xmm4 + movdqu XMMWORD PTR [rdx], xmm0 + xor rdi, QWORD PTR [r9+rbx] + lea r14, QWORD PTR [r9+rbx] + mov r12, QWORD PTR [r14+8] + xor edx, edx + lea r9d, DWORD PTR [ecx+ecx] + add r9d, ebp + movdqa xmm0, xmm6 + psrldq xmm0, 8 + or r9d, r13d + movd rax, xmm0 + div r9 + xorps xmm3, xmm3 + mov eax, eax + shl rdx, 32 + add rdx, rax + lea r9, QWORD PTR [rdx+rbp] + mov r15, rdx + mov rax, r9 + shr rax, 12 + movd xmm0, rax + paddq xmm0, xmm8 + sqrtsd xmm3, xmm0 + psubq xmm3, XMMWORD PTR [rsp+16] + movd rdx, xmm3 + test edx, 524287 + je rwz_sqrt_fixup + psrlq xmm3, 19 +rwz_sqrt_fixup_ret: + + mov ecx, r10d + mov rax, rdi + mul rbp + movd xmm2, rdx + xor rdx, [rcx+rbx] + add r8, rdx + mov QWORD PTR [r14], r8 + xor r8, rdi + mov edi, r8d + and edi, 2097136 + movd xmm0, rax + xor rax, [rcx+rbx+8] + add r11, rax + mov QWORD PTR [r14+8], r11 + punpcklqdq xmm2, xmm0 + + mov r9d, r10d + xor r9d, 48 + xor r10d, 16 + pxor xmm2, XMMWORD PTR [r9+rbx] + movdqu xmm0, XMMWORD PTR [r10+rbx] + paddq xmm0, xmm4 + movdqu xmm1, XMMWORD PTR [rcx+rbx] + paddq xmm2, xmm5 + paddq xmm1, xmm7 + movdqa xmm5, xmm4 + movdqu XMMWORD PTR [r9+rbx], xmm2 + movdqa xmm4, xmm6 + movdqu XMMWORD PTR [rcx+rbx], xmm0 + movdqu XMMWORD PTR [r10+rbx], xmm1 + movdqu xmm6, [rdi+rbx] + mov r10d, edi + xor r11, r12 + dec rsi + jne rwz_main_loop + + ldmxcsr DWORD PTR [rsp] + mov rbx, QWORD PTR [rsp+160] + movaps xmm6, XMMWORD PTR [rsp+64] + movaps xmm7, XMMWORD PTR [rsp+48] + movaps xmm8, XMMWORD PTR [rsp+32] + add rsp, 80 + pop r15 + pop r14 + pop r13 + pop r12 + pop rdi + pop rsi + pop rbp + jmp cnv2_rwz_main_loop_endp + +rwz_sqrt_fixup: + dec rdx + mov r13d, -1022 + shl r13, 32 + mov rax, rdx + shr rdx, 19 + shr rax, 20 + mov rcx, rdx + sub rcx, rax + add rax, r13 + not r13 + sub rcx, r13 + mov r13d, -2147483647 + imul rcx, rax + sub rcx, r9 + adc rdx, 0 + movd xmm3, rdx + jmp rwz_sqrt_fixup_ret + +cnv2_rwz_main_loop_endp: diff --git a/src/crypto/asm/win64/cn_main_loop.S b/src/crypto/asm/win64/cn_main_loop.S index 1200c4df..63c3a8ba 100644 --- a/src/crypto/asm/win64/cn_main_loop.S +++ b/src/crypto/asm/win64/cn_main_loop.S @@ -5,6 +5,8 @@ .global cnv2_mainloop_ryzen_asm .global cnv2_mainloop_bulldozer_asm .global cnv2_double_mainloop_sandybridge_asm +.global cnv2_rwz_mainloop_asm +.global cnv2_rwz_double_mainloop_asm ALIGN(64) cnv2_mainloop_ivybridge_asm: @@ -29,3 +31,15 @@ cnv2_double_mainloop_sandybridge_asm: #include "../cn2/cnv2_double_main_loop_sandybridge.inc" ret 0 mov eax, 3735929054 + +ALIGN(64) +cnv2_rwz_mainloop_asm: + #include "cn2/cnv2_rwz_main_loop.inc" + ret 0 + mov eax, 3735929054 + +ALIGN(64) +cnv2_rwz_double_mainloop_asm: + #include "cn2/cnv2_rwz_double_main_loop.inc" + ret 0 + mov eax, 3735929054 diff --git a/src/crypto/asm/win64/cn_main_loop.asm b/src/crypto/asm/win64/cn_main_loop.asm index 846b860c..57246cf5 100644 --- a/src/crypto/asm/win64/cn_main_loop.asm +++ b/src/crypto/asm/win64/cn_main_loop.asm @@ -3,6 +3,8 @@ PUBLIC cnv2_mainloop_ivybridge_asm PUBLIC cnv2_mainloop_ryzen_asm PUBLIC cnv2_mainloop_bulldozer_asm PUBLIC cnv2_double_mainloop_sandybridge_asm +PUBLIC cnv2_rwz_mainloop_asm +PUBLIC cnv2_rwz_double_mainloop_asm ALIGN 64 cnv2_mainloop_ivybridge_asm PROC @@ -32,5 +34,19 @@ cnv2_double_mainloop_sandybridge_asm PROC mov eax, 3735929054 cnv2_double_mainloop_sandybridge_asm ENDP +ALIGN(64) +cnv2_rwz_mainloop_asm PROC + INCLUDE cn2/cnv2_rwz_main_loop.inc + ret 0 + mov eax, 3735929054 +cnv2_rwz_mainloop_asm ENDP + +ALIGN(64) +cnv2_rwz_double_mainloop_asm PROC + INCLUDE cn2/cnv2_rwz_double_main_loop.inc + ret 0 + mov eax, 3735929054 +cnv2_rwz_double_mainloop_asm ENDP + _TEXT_CNV2_MAINLOOP ENDS END diff --git a/src/nvidia/cuda_core.cu b/src/nvidia/cuda_core.cu index daadcc94..a80d2de5 100644 --- a/src/nvidia/cuda_core.cu +++ b/src/nvidia/cuda_core.cu @@ -357,8 +357,8 @@ __global__ void cryptonight_core_gpu_phase2_double( __syncthreads(); # endif - myChunks[idx1 ^ 2 + sub] = chunk3 + bx1; - myChunks[idx1 ^ 4 + sub] = chunk1 + bx0; + myChunks[idx1 ^ 2 + sub] = ((VARIANT == xmrig::VARIANT_RWZ) ? chunk1 : chunk3) + bx1; + myChunks[idx1 ^ 4 + sub] = ((VARIANT == xmrig::VARIANT_RWZ) ? chunk3 : chunk1) + bx0; myChunks[idx1 ^ 6 + sub] = chunk2 + ax0; } @@ -412,8 +412,8 @@ __global__ void cryptonight_core_gpu_phase2_double( __syncthreads( ); # endif - myChunks[idx1 ^ 2 + sub] = chunk3 + bx1; - myChunks[idx1 ^ 4 + sub] = chunk1 + bx0; + myChunks[idx1 ^ 2 + sub] = ((VARIANT == xmrig::VARIANT_RWZ) ? chunk1 : chunk3) + bx1; + myChunks[idx1 ^ 4 + sub] = ((VARIANT == xmrig::VARIANT_RWZ) ? chunk3 : chunk1) + bx0; myChunks[idx1 ^ 6 + sub] = chunk2 + ax0; ax0 += res; @@ -935,6 +935,10 @@ void cryptonight_gpu_hash(nvid_ctx *ctx, xmrig::Algo algo, xmrig::Variant varian cryptonight_core_gpu_hash(ctx, startNonce); break; + case VARIANT_RWZ: + cryptonight_core_gpu_hash(ctx, startNonce); + break; + default: break; } diff --git a/src/nvidia/cuda_extra.cu b/src/nvidia/cuda_extra.cu index 21a157a2..24d703a1 100644 --- a/src/nvidia/cuda_extra.cu +++ b/src/nvidia/cuda_extra.cu @@ -169,7 +169,7 @@ __global__ void cryptonight_extra_gpu_prepare( memcpy(d_ctx_b + thread * 16 + 4, ctx_b, 4 * 4); // r0, r1, r2, r3 memcpy(d_ctx_b + thread * 16 + 2 * 4, ctx_state + 24, 4 * 8); - } else if (VARIANT == xmrig::VARIANT_2 || VARIANT == xmrig::VARIANT_HALF) { + } else if (VARIANT == xmrig::VARIANT_2) { memcpy(d_ctx_b + thread * 16, ctx_b, 4 * 4); // bx1 XOR_BLOCKS_DST(ctx_state + 16, ctx_state + 20, ctx_b); @@ -408,7 +408,7 @@ void cryptonight_extra_cpu_prepare(nvid_ctx *ctx, uint32_t startNonce, xmrig::Al } else if (variant == xmrig::VARIANT_4) { CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare << > > (wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state, ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2)); - } else if (variant == xmrig::VARIANT_2 || variant == xmrig::VARIANT_HALF || variant == xmrig::VARIANT_TRTL) { + } else if (variant == xmrig::VARIANT_2 || variant == xmrig::VARIANT_HALF || variant == xmrig::VARIANT_TRTL || variant == xmrig::VARIANT_RWZ) { CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>(wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state, ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2)); } else { From 31b89f840efb707f8fff91b69152b0363ef24a62 Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 5 Mar 2019 14:07:48 +0700 Subject: [PATCH 5/8] Add support for embedded config. --- CMakeLists.txt | 8 ++- cmake/cn-gpu.cmake | 6 +-- src/common/config/ConfigLoader.cpp | 14 ++++++ src/config.json | 3 +- src/core/ConfigLoader_default.h | 80 ++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 src/core/ConfigLoader_default.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ffb73c8..fef7d491 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,8 @@ option(WITH_ASM "Enable ASM PoW implementations" ON) option(BUILD_STATIC "Build static binary" OFF) option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) -option(WITH_DEBUG_LOG "Enable debug log output, network, etc" OFF) +option(WITH_DEBUG_LOG "Enable debug log output, network, etc" OFF) +option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) include (CheckIncludeFile) include (cmake/cpu.cmake) @@ -67,6 +68,7 @@ set(HEADERS src/common/utils/mm_malloc.h src/common/utils/timestamp.h src/common/xmrig.h + src/core/ConfigLoader_default.h src/core/ConfigLoader_platform.h src/core/Controller.h src/core/usage.h @@ -219,6 +221,10 @@ if (NOT WITH_CN_PICO) add_definitions(/DXMRIG_NO_CN_PICO) endif() +if (WITH_EMBEDDED_CONFIG) + add_definitions(/DXMRIG_FEATURE_EMBEDDED_CONFIG) +endif() + include_directories(src) include(cmake/flags.cmake) diff --git a/cmake/cn-gpu.cmake b/cmake/cn-gpu.cmake index 81092519..a2d9943e 100644 --- a/cmake/cn-gpu.cmake +++ b/cmake/cn-gpu.cmake @@ -4,14 +4,14 @@ if (WITH_CN_GPU AND CMAKE_SIZEOF_VOID_P EQUAL 8) set(CN_GPU_SOURCES src/crypto/cn_gpu_arm.cpp) if (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang) - set_source_files_properties(src/crypto/cn_gpu_arm.cpp PROPERTIES COMPILE_FLAGS "-O3") + set_source_files_properties(src/crypto/cn_gpu_arm.cpp PROPERTIES COMPILE_FLAGS "-O2") endif() else() set(CN_GPU_SOURCES src/crypto/cn_gpu_avx.cpp src/crypto/cn_gpu_ssse3.cpp) if (CMAKE_CXX_COMPILER_ID MATCHES GNU) - set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "-O3 -mavx2") - set_source_files_properties(src/crypto/cn_gpu_ssse3.cpp PROPERTIES COMPILE_FLAGS "-O3") + set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "-O2 -mavx2") + set_source_files_properties(src/crypto/cn_gpu_ssse3.cpp PROPERTIES COMPILE_FLAGS "-O2") elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang) set_source_files_properties(src/crypto/cn_gpu_avx.cpp PROPERTIES COMPILE_FLAGS "-mavx2") elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) diff --git a/src/common/config/ConfigLoader.cpp b/src/common/config/ConfigLoader.cpp index fd46e791..26742e5f 100644 --- a/src/common/config/ConfigLoader.cpp +++ b/src/common/config/ConfigLoader.cpp @@ -53,6 +53,11 @@ #include "rapidjson/fwd.h" +#ifdef XMRIG_FEATURE_EMBEDDED_CONFIG +# include "core/ConfigLoader_default.h" +#endif + + xmrig::ConfigWatcher *xmrig::ConfigLoader::m_watcher = nullptr; xmrig::IConfigCreator *xmrig::ConfigLoader::m_creator = nullptr; xmrig::IConfigListener *xmrig::ConfigLoader::m_listener = nullptr; @@ -180,6 +185,15 @@ xmrig::IConfig *xmrig::ConfigLoader::load(Process *process, IConfigCreator *crea loadFromFile(config, process->location(Process::ExeLocation, "config.json")); } +# ifdef XMRIG_FEATURE_EMBEDDED_CONFIG + if (!config->finalize()) { + delete config; + + config = m_creator->create(); + loadFromJSON(config, default_config); + } +# endif + if (!config->finalize()) { if (!config->algorithm().isValid()) { fprintf(stderr, "No valid algorithm specified. Exiting.\n"); diff --git a/src/config.json b/src/config.json index 46894d39..edd7afa9 100644 --- a/src/config.json +++ b/src/config.json @@ -24,6 +24,7 @@ "nicehash": false, "keepalive": true, "variant": -1, + "enabled": true, "tls": false, "tls-fingerprint": null } @@ -34,5 +35,5 @@ "threads": null, "user-agent": null, "syslog": false, - "watch": false + "watch": true } \ No newline at end of file diff --git a/src/core/ConfigLoader_default.h b/src/core/ConfigLoader_default.h new file mode 100644 index 00000000..a9230965 --- /dev/null +++ b/src/core/ConfigLoader_default.h @@ -0,0 +1,80 @@ +/* XMRig + * Copyright 2010 Jeff Garzik + * Copyright 2012-2014 pooler + * Copyright 2014 Lucas Jones + * Copyright 2014-2016 Wolf9466 + * Copyright 2016 Jay D Dee + * Copyright 2017-2018 XMR-Stak , + * Copyright 2018-2019 SChernykh + * Copyright 2016-2019 XMRig , + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef XMRIG_CONFIGLOADER_DEFAULT_H +#define XMRIG_CONFIGLOADER_DEFAULT_H + + +namespace xmrig { + + +#ifdef XMRIG_FEATURE_EMBEDDED_CONFIG +const static char *default_config = +R"===( +{ + "algo": "cryptonight", + "api": { + "port": 0, + "access-token": null, + "id": null, + "worker-id": null, + "ipv6": false, + "restricted": true + }, + "background": false, + "colors": true, + "cuda-bfactor": null, + "cuda-bsleep": null, + "cuda-max-threads": 64, + "donate-level": 5, + "log-file": null, + "pools": [ + { + "url": "donate.v2.xmrig.com:3333", + "user": "YOUR_WALLET", + "pass": "x", + "rig-id": null, + "nicehash": false, + "keepalive": true, + "variant": -1, + "enabled": true, + "tls": false, + "tls-fingerprint": null + } + ], + "print-time": 60, + "retries": 5, + "retry-pause": 5, + "threads": null, + "user-agent": null, + "syslog": false, + "watch": true +} +)==="; +#endif + + +} /* namespace xmrig */ + +#endif /* XMRIG_CONFIGLOADER_DEFAULT_H */ From 6adc4e6f3679c4dd3246c8121c0a9982eb3884c6 Mon Sep 17 00:00:00 2001 From: XMRig Date: Tue, 5 Mar 2019 16:17:00 +0700 Subject: [PATCH 6/8] Fix AVX detection. --- src/common/cpu/BasicCpuInfo.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/common/cpu/BasicCpuInfo.cpp b/src/common/cpu/BasicCpuInfo.cpp index e9018c98..d7778bdd 100644 --- a/src/common/cpu/BasicCpuInfo.cpp +++ b/src/common/cpu/BasicCpuInfo.cpp @@ -36,6 +36,10 @@ # define bit_AES (1 << 25) #endif +#ifndef bit_OSXSAVE +# define bit_OSXSAVE (1 << 27) +#endif + #ifndef bit_AVX2 # define bit_AVX2 (1 << 5) #endif @@ -107,10 +111,19 @@ static inline bool has_avx2() } +static inline bool has_ossave() +{ + int32_t cpu_info[4] = { 0 }; + cpuid(PROCESSOR_INFO, cpu_info); + + return (cpu_info[ECX_Reg] & bit_OSXSAVE) != 0; +} + + xmrig::BasicCpuInfo::BasicCpuInfo() : m_assembly(ASM_NONE), m_aes(has_aes_ni()), - m_avx2(has_avx2()), + m_avx2(has_avx2() && has_ossave()), m_brand(), m_threads(std::thread::hardware_concurrency()) { From 87cdfb82189444a3d8f5e7bec16b34a6c01b1391 Mon Sep 17 00:00:00 2001 From: XMRig Date: Wed, 6 Mar 2019 13:56:17 +0700 Subject: [PATCH 7/8] * Added "cn/zls" (Zelerius) and "cn/double" (X-CASH) algorithms. --- src/base/net/Pool.cpp | 4 +- src/common/crypto/Algorithm.cpp | 32 +++--- src/common/net/Job.cpp | 6 + src/common/xmrig.h | 34 +++--- src/crypto/CryptoNight.cpp | 170 ++++++++++++++++++----------- src/crypto/CryptoNight_constants.h | 40 ++++--- src/crypto/CryptoNight_test.h | 31 +++++- src/crypto/CryptoNight_x86.h | 38 +++++++ src/nvidia/cuda_core.cu | 8 ++ src/nvidia/cuda_extra.cu | 2 +- 10 files changed, 252 insertions(+), 113 deletions(-) diff --git a/src/base/net/Pool.cpp b/src/base/net/Pool.cpp index a1fd4331..9d4f2bde 100644 --- a/src/base/net/Pool.cpp +++ b/src/base/net/Pool.cpp @@ -484,7 +484,6 @@ void xmrig::Pool::rebuild() m_algorithms.push_back(m_algorithm); # ifndef XMRIG_PROXY_PROJECT - addVariant(VARIANT_RWZ); addVariant(VARIANT_4); addVariant(VARIANT_WOW); addVariant(VARIANT_2); @@ -498,6 +497,9 @@ void xmrig::Pool::rebuild() addVariant(VARIANT_XAO); addVariant(VARIANT_RTO); addVariant(VARIANT_GPU); + addVariant(VARIANT_RWZ); + addVariant(VARIANT_ZLS); + addVariant(VARIANT_DOUBLE); addVariant(VARIANT_AUTO); # endif } diff --git a/src/common/crypto/Algorithm.cpp b/src/common/crypto/Algorithm.cpp index ae7ba7c7..f14d034d 100644 --- a/src/common/crypto/Algorithm.cpp +++ b/src/common/crypto/Algorithm.cpp @@ -54,19 +54,21 @@ struct AlgoData static AlgoData const algorithms[] = { - { "cryptonight", "cn", xmrig::CRYPTONIGHT, xmrig::VARIANT_AUTO }, - { "cryptonight/0", "cn/0", xmrig::CRYPTONIGHT, xmrig::VARIANT_0 }, - { "cryptonight/1", "cn/1", xmrig::CRYPTONIGHT, xmrig::VARIANT_1 }, - { "cryptonight/xtl", "cn/xtl", xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL }, - { "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR }, - { "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO }, - { "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO }, - { "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 }, - { "cryptonight/half", "cn/half", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, - { "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, - { "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW }, - { "cryptonight/r", "cn/r", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 }, - { "cryptonight/rwz", "cn/rwz", xmrig::CRYPTONIGHT, xmrig::VARIANT_RWZ }, + { "cryptonight", "cn", xmrig::CRYPTONIGHT, xmrig::VARIANT_AUTO }, + { "cryptonight/0", "cn/0", xmrig::CRYPTONIGHT, xmrig::VARIANT_0 }, + { "cryptonight/1", "cn/1", xmrig::CRYPTONIGHT, xmrig::VARIANT_1 }, + { "cryptonight/xtl", "cn/xtl", xmrig::CRYPTONIGHT, xmrig::VARIANT_XTL }, + { "cryptonight/msr", "cn/msr", xmrig::CRYPTONIGHT, xmrig::VARIANT_MSR }, + { "cryptonight/xao", "cn/xao", xmrig::CRYPTONIGHT, xmrig::VARIANT_XAO }, + { "cryptonight/rto", "cn/rto", xmrig::CRYPTONIGHT, xmrig::VARIANT_RTO }, + { "cryptonight/2", "cn/2", xmrig::CRYPTONIGHT, xmrig::VARIANT_2 }, + { "cryptonight/half", "cn/half", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, + { "cryptonight/xtlv9", "cn/xtlv9", xmrig::CRYPTONIGHT, xmrig::VARIANT_HALF }, + { "cryptonight/wow", "cn/wow", xmrig::CRYPTONIGHT, xmrig::VARIANT_WOW }, + { "cryptonight/r", "cn/r", xmrig::CRYPTONIGHT, xmrig::VARIANT_4 }, + { "cryptonight/rwz", "cn/rwz", xmrig::CRYPTONIGHT, xmrig::VARIANT_RWZ }, + { "cryptonight/zls", "cn/zls", xmrig::CRYPTONIGHT, xmrig::VARIANT_ZLS }, + { "cryptonight/double", "cn/double", xmrig::CRYPTONIGHT, xmrig::VARIANT_DOUBLE }, # ifndef XMRIG_NO_AEON { "cryptonight-lite", "cn-lite", xmrig::CRYPTONIGHT_LITE, xmrig::VARIANT_AUTO }, @@ -134,7 +136,9 @@ static const char *variants[] = { "gpu", "wow", "r", - "rwz" + "rwz", + "zls", + "double" }; diff --git a/src/common/net/Job.cpp b/src/common/net/Job.cpp index 54686535..851507ad 100644 --- a/src/common/net/Job.cpp +++ b/src/common/net/Job.cpp @@ -137,6 +137,12 @@ bool xmrig::Job::setBlob(const char *blob) else if (m_algorithm.variant() == VARIANT_WOW && m_blob[0] < 11) { m_algorithm.setVariant(VARIANT_2); } + else if (m_algorithm.variant() == VARIANT_RWZ && m_blob[0] < 12) { + m_algorithm.setVariant(VARIANT_2); + } + else if (m_algorithm.variant() == VARIANT_ZLS && m_blob[0] < 8) { + m_algorithm.setVariant(VARIANT_2); + } } # ifdef XMRIG_PROXY_PROJECT diff --git a/src/common/xmrig.h b/src/common/xmrig.h index 575251b7..e8ca8857 100644 --- a/src/common/xmrig.h +++ b/src/common/xmrig.h @@ -61,22 +61,24 @@ enum AlgoVariant { enum Variant { - VARIANT_AUTO = -1, // Autodetect - VARIANT_0 = 0, // Original CryptoNight or CryptoNight-Heavy - VARIANT_1 = 1, // CryptoNight variant 1 also known as Monero7 and CryptoNightV7 - VARIANT_TUBE = 2, // Modified CryptoNight-Heavy (TUBE only) - VARIANT_XTL = 3, // Modified CryptoNight variant 1 (Stellite only) - VARIANT_MSR = 4, // Modified CryptoNight variant 1 (Masari only) - VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only) - VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only) - VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only) - VARIANT_2 = 8, // CryptoNight variant 2 - VARIANT_HALF = 9, // CryptoNight variant 2 with half iterations (Masari/Stellite) - VARIANT_TRTL = 10, // CryptoNight Turtle (TRTL) - VARIANT_GPU = 11, // CryptoNight-GPU (Ryo) - VARIANT_WOW = 12, // CryptoNightR (Wownero) - VARIANT_4 = 13, // CryptoNightR (Monero's variant 4) - VARIANT_RWZ = 14, // CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft) + VARIANT_AUTO = -1, // Autodetect + VARIANT_0 = 0, // Original CryptoNight or CryptoNight-Heavy + VARIANT_1 = 1, // CryptoNight variant 1 also known as Monero7 and CryptoNightV7 + VARIANT_TUBE = 2, // Modified CryptoNight-Heavy (TUBE only) + VARIANT_XTL = 3, // Modified CryptoNight variant 1 (Stellite only) + VARIANT_MSR = 4, // Modified CryptoNight variant 1 (Masari only) + VARIANT_XHV = 5, // Modified CryptoNight-Heavy (Haven Protocol only) + VARIANT_XAO = 6, // Modified CryptoNight variant 0 (Alloy only) + VARIANT_RTO = 7, // Modified CryptoNight variant 1 (Arto only) + VARIANT_2 = 8, // CryptoNight variant 2 + VARIANT_HALF = 9, // CryptoNight variant 2 with half iterations (Masari/Stellite) + VARIANT_TRTL = 10, // CryptoNight Turtle (TRTL) + VARIANT_GPU = 11, // CryptoNight-GPU (Ryo) + VARIANT_WOW = 12, // CryptoNightR (Wownero) + VARIANT_4 = 13, // CryptoNightR (Monero's variant 4) + VARIANT_RWZ = 14, // CryptoNight variant 2 with 3/4 iterations and reversed shuffle operation (Graft) + VARIANT_ZLS = 15, // CryptoNight variant 2 with 3/4 iterations (Zelerius) + VARIANT_DOUBLE = 16, // CryptoNight variant 2 with double iterations (X-CASH) VARIANT_MAX }; diff --git a/src/crypto/CryptoNight.cpp b/src/crypto/CryptoNight.cpp index ed63d257..35ce910f 100644 --- a/src/crypto/CryptoNight.cpp +++ b/src/crypto/CryptoNight.cpp @@ -52,15 +52,25 @@ bool CryptoNight::hash(const xmrig::Job &job, xmrig::JobResult &result, cryptoni #ifndef XMRIG_NO_ASM -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; -xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; -xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr; - -xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; -xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; -xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; -xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm; +xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; +xmrig::CpuThread::cn_mainloop_double_fun cn_half_double_mainloop_sandybridge_asm = nullptr; + +xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr; +xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; + +xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr; +xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; + +xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr; +xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr; +xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm = nullptr; template static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask) @@ -98,28 +108,50 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma static void patchAsmVariants() { + using namespace xmrig; + const int allocation_size = 65536; uint8_t *base = static_cast(Mem::allocateExecutableMemory(allocation_size)); - cn_half_mainloop_ivybridge_asm = reinterpret_cast (base + 0x0000); - cn_half_mainloop_ryzen_asm = reinterpret_cast (base + 0x1000); - cn_half_mainloop_bulldozer_asm = reinterpret_cast (base + 0x2000); - cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); - - cn_trtl_mainloop_ivybridge_asm = reinterpret_cast (base + 0x4000); - cn_trtl_mainloop_ryzen_asm = reinterpret_cast (base + 0x5000); - cn_trtl_mainloop_bulldozer_asm = reinterpret_cast (base + 0x6000); - cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); - - patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_HALF_ITER, xmrig::CRYPTONIGHT_MASK); - - patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); - patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, xmrig::CRYPTONIGHT_TRTL_ITER, xmrig::CRYPTONIGHT_PICO_MASK); + cn_half_mainloop_ivybridge_asm = reinterpret_cast (base + 0x0000); + cn_half_mainloop_ryzen_asm = reinterpret_cast (base + 0x1000); + cn_half_mainloop_bulldozer_asm = reinterpret_cast (base + 0x2000); + cn_half_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x3000); + + cn_trtl_mainloop_ivybridge_asm = reinterpret_cast (base + 0x4000); + cn_trtl_mainloop_ryzen_asm = reinterpret_cast (base + 0x5000); + cn_trtl_mainloop_bulldozer_asm = reinterpret_cast (base + 0x6000); + cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0x7000); + + cn_zls_mainloop_ivybridge_asm = reinterpret_cast (base + 0x8000); + cn_zls_mainloop_ryzen_asm = reinterpret_cast (base + 0x9000); + cn_zls_mainloop_bulldozer_asm = reinterpret_cast (base + 0xA000); + cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xB000); + + cn_double_mainloop_ivybridge_asm = reinterpret_cast (base + 0xC000); + cn_double_mainloop_ryzen_asm = reinterpret_cast (base + 0xD000); + cn_double_mainloop_bulldozer_asm = reinterpret_cast (base + 0xE000); + cn_double_double_mainloop_sandybridge_asm = reinterpret_cast (base + 0xF000); + + patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, CRYPTONIGHT_HALF_ITER, CRYPTONIGHT_MASK); + patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, CRYPTONIGHT_HALF_ITER, CRYPTONIGHT_MASK); + patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, CRYPTONIGHT_HALF_ITER, CRYPTONIGHT_MASK); + patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, CRYPTONIGHT_HALF_ITER, CRYPTONIGHT_MASK); + + patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, CRYPTONIGHT_TRTL_ITER, CRYPTONIGHT_PICO_MASK); + patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, CRYPTONIGHT_TRTL_ITER, CRYPTONIGHT_PICO_MASK); + patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, CRYPTONIGHT_TRTL_ITER, CRYPTONIGHT_PICO_MASK); + patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, CRYPTONIGHT_TRTL_ITER, CRYPTONIGHT_PICO_MASK); + + patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, CRYPTONIGHT_ZLS_ITER, CRYPTONIGHT_MASK); + patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, CRYPTONIGHT_ZLS_ITER, CRYPTONIGHT_MASK); + patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, CRYPTONIGHT_ZLS_ITER, CRYPTONIGHT_MASK); + patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, CRYPTONIGHT_ZLS_ITER, CRYPTONIGHT_MASK); + + patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, CRYPTONIGHT_DOUBLE_ITER, CRYPTONIGHT_MASK); + patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, CRYPTONIGHT_DOUBLE_ITER, CRYPTONIGHT_MASK); + patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, CRYPTONIGHT_DOUBLE_ITER, CRYPTONIGHT_MASK); + patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, CRYPTONIGHT_DOUBLE_ITER, CRYPTONIGHT_MASK); Mem::protectExecutableMemory(base, allocation_size); Mem::flushInstructionCache(base, allocation_size); @@ -139,6 +171,19 @@ bool CryptoNight::init(xmrig::Algo algorithm) } +template +static void cryptonight_single_hash_wrapper(const uint8_t *input, size_t size, uint8_t *output, cryptonight_ctx **ctx, uint64_t height) +{ + using namespace xmrig; + +# ifdef XMRIG_NO_ASM + cryptonight_single_hash(input, size, output, ctx, height); +# else + cryptonight_single_hash_asm(input, size, output, ctx, height); +# endif +} + + CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerify av, xmrig::Variant variant) { using namespace xmrig; @@ -168,18 +213,10 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif cryptonight_single_hash, cryptonight_single_hash, -# ifdef XMRIG_NO_ASM - cryptonight_single_hash, -# else - cryptonight_single_hash_asm, -# endif + cryptonight_single_hash_wrapper, cryptonight_single_hash, -# ifdef XMRIG_NO_ASM - cryptonight_single_hash, -# else - cryptonight_single_hash_asm, -# endif + cryptonight_single_hash_wrapper, cryptonight_single_hash, nullptr, nullptr, // VARIANT_TRTL @@ -191,27 +228,21 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_GPU # endif -# ifdef XMRIG_NO_ASM - cryptonight_single_hash, -# else - cryptonight_single_hash_asm, -# endif + cryptonight_single_hash_wrapper, cryptonight_single_hash, -# ifdef XMRIG_NO_ASM - cryptonight_single_hash, -# else - cryptonight_single_hash_asm, -# endif + cryptonight_single_hash_wrapper, cryptonight_single_hash, -# ifdef XMRIG_NO_ASM - cryptonight_single_hash, -# else - cryptonight_single_hash_asm, -# endif + cryptonight_single_hash_wrapper, cryptonight_single_hash, + cryptonight_single_hash_wrapper, + cryptonight_single_hash, + + cryptonight_single_hash_wrapper, + cryptonight_single_hash, + # ifndef XMRIG_NO_AEON cryptonight_single_hash, cryptonight_single_hash, @@ -232,6 +263,8 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_WOW nullptr, nullptr, // VARIANT_4 nullptr, nullptr, // VARIANT_RWZ + nullptr, nullptr, // VARIANT_ZLS + nullptr, nullptr, // VARIANT_DOUBLE # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -240,6 +273,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, # endif @@ -267,6 +301,8 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_WOW nullptr, nullptr, // VARIANT_4 nullptr, nullptr, // VARIANT_RWZ + nullptr, nullptr, // VARIANT_ZLS + nullptr, nullptr, // VARIANT_DOUBLE # else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -275,6 +311,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, # endif # ifndef XMRIG_NO_CN_PICO @@ -300,6 +337,8 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, // VARIANT_WOW nullptr, nullptr, // VARIANT_4 nullptr, nullptr, // VARIANT_RWZ + nullptr, nullptr, // VARIANT_ZLS + nullptr, nullptr, // VARIANT_DOUBLE #else nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, @@ -308,6 +347,7 @@ CryptoNight::cn_hash_fun CryptoNight::fn(xmrig::Algo algorithm, xmrig::AlgoVerif nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, # endif }; @@ -335,17 +375,19 @@ bool CryptoNight::selfTest() { Mem::create(&m_ctx, m_algorithm, 1); if (m_algorithm == xmrig::CRYPTONIGHT) { - const bool rc = verify(VARIANT_0, test_output_v0) && - verify(VARIANT_1, test_output_v1) && - verify(VARIANT_2, test_output_v2) && - verify(VARIANT_XTL, test_output_xtl) && - verify(VARIANT_MSR, test_output_msr) && - verify(VARIANT_XAO, test_output_xao) && - verify(VARIANT_RTO, test_output_rto) && - verify(VARIANT_HALF, test_output_half) && - verify2(VARIANT_WOW, test_output_wow) && - verify2(VARIANT_4, test_output_r) && - verify(VARIANT_RWZ, test_output_rwz); + const bool rc = verify(VARIANT_0, test_output_v0) && + verify(VARIANT_1, test_output_v1) && + verify(VARIANT_2, test_output_v2) && + verify(VARIANT_XTL, test_output_xtl) && + verify(VARIANT_MSR, test_output_msr) && + verify(VARIANT_XAO, test_output_xao) && + verify(VARIANT_RTO, test_output_rto) && + verify(VARIANT_HALF, test_output_half) && + verify2(VARIANT_WOW, test_output_wow) && + verify2(VARIANT_4, test_output_r) && + verify(VARIANT_RWZ, test_output_rwz) && + verify(VARIANT_ZLS, test_output_zls) && + verify(VARIANT_DOUBLE, test_output_double); # ifndef XMRIG_NO_CN_GPU if (!rc) { diff --git a/src/crypto/CryptoNight_constants.h b/src/crypto/CryptoNight_constants.h index a6623eba..58a3915f 100644 --- a/src/crypto/CryptoNight_constants.h +++ b/src/crypto/CryptoNight_constants.h @@ -42,7 +42,9 @@ constexpr const uint32_t CRYPTONIGHT_MASK = 0x1FFFF0; constexpr const uint32_t CRYPTONIGHT_ITER = 0x80000; constexpr const uint32_t CRYPTONIGHT_HALF_ITER = 0x40000; constexpr const uint32_t CRYPTONIGHT_XAO_ITER = 0x100000; +constexpr const uint32_t CRYPTONIGHT_DOUBLE_ITER = 0x100000; constexpr const uint32_t CRYPTONIGHT_WALTZ_ITER = 0x60000; +constexpr const uint32_t CRYPTONIGHT_ZLS_ITER = 0x60000; constexpr const uint32_t CRYPTONIGHT_GPU_ITER = 0xC000; constexpr const uint32_t CRYPTONIGHT_GPU_MASK = 0x1FFFC0; @@ -136,6 +138,8 @@ template<> inline constexpr uint32_t cn_select_iter() template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_GPU_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_WALTZ_ITER; } +template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_ZLS_ITER; } +template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_DOUBLE_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_LITE_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_LITE_ITER; } template<> inline constexpr uint32_t cn_select_iter() { return CRYPTONIGHT_HEAVY_ITER; } @@ -155,12 +159,14 @@ inline uint32_t cn_select_iter(Algo algorithm, Variant variant) return CRYPTONIGHT_GPU_ITER; case VARIANT_RTO: + case VARIANT_DOUBLE: return CRYPTONIGHT_XAO_ITER; case VARIANT_TRTL: return CRYPTONIGHT_TRTL_ITER; case VARIANT_RWZ: + case VARIANT_ZLS: return CRYPTONIGHT_WALTZ_ITER; default: @@ -189,22 +195,24 @@ inline uint32_t cn_select_iter(Algo algorithm, Variant variant) } -template inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_GPU; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } -template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +template inline constexpr Variant cn_base_variant() { return VARIANT_0; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_0; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_1; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_GPU; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } +template<> inline constexpr Variant cn_base_variant() { return VARIANT_2; } template inline constexpr bool cn_is_cryptonight_r() { return false; } diff --git a/src/crypto/CryptoNight_test.h b/src/crypto/CryptoNight_test.h index 4f10b221..6fa9dd28 100644 --- a/src/crypto/CryptoNight_test.h +++ b/src/crypto/CryptoNight_test.h @@ -230,6 +230,7 @@ const static uint8_t test_output_rto[160] = { 0xE7, 0x81, 0x4E, 0x2A, 0xBD, 0x62, 0xC1, 0x1B, 0x7C, 0xB9, 0x33, 0x7B, 0xEE, 0x95, 0x80, 0xB3 }; +// "cn/rwz" const static uint8_t test_output_rwz[160] = { 0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85, 0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8, @@ -240,7 +241,35 @@ const static uint8_t test_output_rwz[160] = { 0x9a, 0xe9, 0xba, 0x0c, 0xc0, 0x2b, 0x11, 0xf6, 0x9b, 0xee, 0x24, 0x3a, 0xd8, 0x86, 0x18, 0xd0, 0xe8, 0xeb, 0xcb, 0x38, 0x2c, 0xf5, 0x99, 0x83, 0x14, 0x7b, 0x0c, 0x20, 0xbe, 0x50, 0xf4, 0x87, 0x83, 0x41, 0x75, 0xd8, 0xd1, 0xdd, 0x4b, 0x73, 0xb3, 0x92, 0x8f, 0xe6, 0x1c, 0x72, 0x70, 0xf5, - 0x7c, 0xf6, 0x23, 0x3a, 0xb4, 0x5f, 0xdf, 0xde, 0xa6, 0x5a, 0x58, 0xec, 0x13, 0x5a, 0x23, 0x2f, + 0x7c, 0xf6, 0x23, 0x3a, 0xb4, 0x5f, 0xdf, 0xde, 0xa6, 0x5a, 0x58, 0xec, 0x13, 0x5a, 0x23, 0x2f +}; + +// "cn/zls" +const static uint8_t test_output_zls[160] = { + 0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E, + 0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2, + 0x0D, 0x62, 0x5B, 0x42, 0x18, 0xE2, 0x76, 0xAD, 0xD0, 0x74, 0x90, 0x60, 0x8D, 0xC4, 0xC7, 0x80, + 0x17, 0xB5, 0x1B, 0x25, 0x31, 0x39, 0x87, 0xD2, 0x2D, 0x6A, 0x9D, 0x1C, 0x74, 0xF4, 0x43, 0x22, + 0x4B, 0x97, 0x1F, 0x6A, 0xD0, 0xBE, 0x00, 0x74, 0xEC, 0xC5, 0xD8, 0x3B, 0xE6, 0xF4, 0x03, 0x8A, + 0x7B, 0xBA, 0x80, 0xCC, 0x9F, 0x00, 0xCB, 0xC2, 0x14, 0x8F, 0xF3, 0xD8, 0x92, 0x73, 0xBF, 0x17, + 0x3D, 0x9B, 0x22, 0xA3, 0x61, 0x94, 0x41, 0x9E, 0xF9, 0x68, 0x1D, 0x42, 0x48, 0x3B, 0x39, 0x45, + 0xE2, 0xE6, 0x16, 0x84, 0xFC, 0x21, 0xE6, 0xDA, 0x38, 0x7F, 0x17, 0xAB, 0xD3, 0xF2, 0xCE, 0x1A, + 0x2F, 0x35, 0xD5, 0x74, 0xFA, 0x45, 0x3B, 0x06, 0xD1, 0x4E, 0x84, 0x3A, 0x5D, 0xE3, 0x0E, 0xA5, + 0x00, 0x08, 0x64, 0xF0, 0xA6, 0xC8, 0x94, 0x45, 0x08, 0xED, 0x03, 0x95, 0x52, 0xE9, 0xBC, 0x5F +}; + +// "cn/double" +const static uint8_t test_output_double[160] = { + 0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9, + 0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21, + 0x49, 0xCE, 0x8E, 0xF3, 0xBC, 0x8A, 0x36, 0xBF, 0x86, 0x37, 0x89, 0x55, 0x09, 0xBA, 0x22, 0xF8, + 0xEB, 0x3A, 0xE1, 0xDC, 0x91, 0xF7, 0x62, 0x4B, 0x9F, 0x48, 0xE6, 0x92, 0xBD, 0xE4, 0x5D, 0xC1, + 0xF1, 0x3C, 0x63, 0x1D, 0xEB, 0x0B, 0x04, 0xA3, 0x30, 0xD5, 0x11, 0x15, 0x4C, 0xCE, 0xEF, 0x4F, + 0xDF, 0x69, 0xE3, 0x9E, 0xD2, 0x68, 0xFC, 0x1B, 0x6F, 0xE8, 0x08, 0x9C, 0xBB, 0xA5, 0x2B, 0x60, + 0x52, 0x0F, 0xE5, 0xD2, 0xF3, 0x8A, 0xB3, 0xE1, 0x76, 0x7F, 0x44, 0x25, 0x76, 0xEC, 0xFF, 0xA2, + 0x0C, 0x64, 0xD0, 0x0E, 0x32, 0x33, 0x28, 0x20, 0x73, 0xE0, 0x31, 0x66, 0x4E, 0x54, 0x83, 0x49, + 0x51, 0x55, 0x4D, 0x2E, 0x22, 0xB7, 0x51, 0x09, 0x73, 0x61, 0x7E, 0x6A, 0x57, 0x0B, 0x28, 0x3C, + 0x5E, 0x2E, 0xC1, 0x80, 0x89, 0x39, 0xB3, 0x54, 0x39, 0x52, 0x0E, 0x69, 0x3D, 0xF6, 0xC5, 0x4A }; #ifndef XMRIG_NO_AEON diff --git a/src/crypto/CryptoNight_x86.h b/src/crypto/CryptoNight_x86.h index 5c4de441..8b9ea783 100644 --- a/src/crypto/CryptoNight_x86.h +++ b/src/crypto/CryptoNight_x86.h @@ -767,6 +767,16 @@ extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; extern xmrig::CpuThread::cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; extern xmrig::CpuThread::cn_mainloop_double_fun cn_trtl_double_mainloop_sandybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ivybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_ryzen_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_zls_mainloop_bulldozer_asm; +extern xmrig::CpuThread::cn_mainloop_double_fun cn_zls_double_mainloop_sandybridge_asm; + +extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ivybridge_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_ryzen_asm; +extern xmrig::CpuThread::cn_mainloop_fun cn_double_mainloop_bulldozer_asm; +extern xmrig::CpuThread::cn_mainloop_double_fun cn_double_double_mainloop_sandybridge_asm; + void wow_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); void v4_compile_code(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); void wow_compile_code_double(const V4_Instruction* code, int code_size, void* machine_code, xmrig::Assembly ASM); @@ -848,6 +858,28 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_ else if (VARIANT == xmrig::VARIANT_RWZ) { cnv2_rwz_mainloop_asm(ctx[0]); } + else if (VARIANT == xmrig::VARIANT_ZLS) { + if (ASM == xmrig::ASM_INTEL) { + cn_zls_mainloop_ivybridge_asm(ctx[0]); + } + else if (ASM == xmrig::ASM_RYZEN) { + cn_zls_mainloop_ryzen_asm(ctx[0]); + } + else { + cn_zls_mainloop_bulldozer_asm(ctx[0]); + } + } + else if (VARIANT == xmrig::VARIANT_DOUBLE) { + if (ASM == xmrig::ASM_INTEL) { + cn_double_mainloop_ivybridge_asm(ctx[0]); + } + else if (ASM == xmrig::ASM_RYZEN) { + cn_double_mainloop_ryzen_asm(ctx[0]); + } + else { + cn_double_mainloop_bulldozer_asm(ctx[0]); + } + } else if (xmrig::cn_is_cryptonight_r()) { ctx[0]->generated_code(ctx[0]); } @@ -889,6 +921,12 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_ else if (VARIANT == xmrig::VARIANT_RWZ) { cnv2_rwz_double_mainloop_asm(ctx[0], ctx[1]); } + else if (VARIANT == xmrig::VARIANT_ZLS) { + cn_zls_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + } + else if (VARIANT == xmrig::VARIANT_DOUBLE) { + cn_double_double_mainloop_sandybridge_asm(ctx[0], ctx[1]); + } else if (xmrig::cn_is_cryptonight_r()) { ctx[0]->generated_code_double(ctx[0], ctx[1]); } diff --git a/src/nvidia/cuda_core.cu b/src/nvidia/cuda_core.cu index a80d2de5..8e32ef59 100644 --- a/src/nvidia/cuda_core.cu +++ b/src/nvidia/cuda_core.cu @@ -939,6 +939,14 @@ void cryptonight_gpu_hash(nvid_ctx *ctx, xmrig::Algo algo, xmrig::Variant varian cryptonight_core_gpu_hash(ctx, startNonce); break; + case VARIANT_ZLS: + cryptonight_core_gpu_hash(ctx, startNonce); + break; + + case VARIANT_DOUBLE: + cryptonight_core_gpu_hash(ctx, startNonce); + break; + default: break; } diff --git a/src/nvidia/cuda_extra.cu b/src/nvidia/cuda_extra.cu index 24d703a1..7c53ad49 100644 --- a/src/nvidia/cuda_extra.cu +++ b/src/nvidia/cuda_extra.cu @@ -408,7 +408,7 @@ void cryptonight_extra_cpu_prepare(nvid_ctx *ctx, uint32_t startNonce, xmrig::Al } else if (variant == xmrig::VARIANT_4) { CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare << > > (wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state, ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2)); - } else if (variant == xmrig::VARIANT_2 || variant == xmrig::VARIANT_HALF || variant == xmrig::VARIANT_TRTL || variant == xmrig::VARIANT_RWZ) { + } else if (variant == xmrig::VARIANT_2 || variant == xmrig::VARIANT_HALF || variant == xmrig::VARIANT_TRTL || variant == xmrig::VARIANT_RWZ || variant == xmrig::VARIANT_ZLS || variant == xmrig::VARIANT_DOUBLE) { CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_extra_gpu_prepare<<>>(wsize, ctx->d_input, ctx->inputlen, startNonce, ctx->d_ctx_state, ctx->d_ctx_state2, ctx->d_ctx_a, ctx->d_ctx_b, ctx->d_ctx_key1, ctx->d_ctx_key2)); } else { From 4d2c5a15fd991acf2db0908227c3710ae0c79342 Mon Sep 17 00:00:00 2001 From: xmrig Date: Wed, 6 Mar 2019 18:10:46 +0700 Subject: [PATCH 8/8] Update CHANGELOG.md --- CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d4e42fe..1631d888 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# v2.14.0 +- **[#242](https://github.com/xmrig/xmrig-nvidia/pull/242) Added new algorithm `cryptonight/rwz`, short alias `cn/rwz` (also known as CryptoNight ReverseWaltz), for upcoming [Graft](https://www.graft.network/) fork.** +- **[#931](https://github.com/xmrig/xmrig/issues/931) Added new algorithm `cryptonight/zls`, short alias `cn/zls` for [Zelerius Network](https://zelerius.org) fork.** +- **[#940](https://github.com/xmrig/xmrig/issues/940) Added new algorithm `cryptonight/double`, short alias `cn/double` (also known as CryptoNight HeavyX), for [X-CASH](https://x-cash.org/).** +- [#241](https://github.com/xmrig/xmrig-nvidia/issues/241) Minimum required CUDA version increased to 8.0 due CUDA 7.5 lacks support for `nvrtcAddNameExpression`. +- [#951](https://github.com/xmrig/xmrig/issues/951#issuecomment-469581529) Fixed crash if AVX was disabled on OS level. +- [#952](https://github.com/xmrig/xmrig/issues/952) Fixed compile error on some Linux. +- [#957](https://github.com/xmrig/xmrig/issues/957#issuecomment-468890667) Added support for embedded config. + # v2.13.0 - **[#938](https://github.com/xmrig/xmrig/issues/938) Added support for new algorithm `cryptonight/r`, short alias `cn/r` (also known as CryptoNightR or CryptoNight variant 4), for upcoming [Monero](https://www.getmonero.org/) fork on March 9, thanks [@SChernykh](https://github.com/SChernykh).** - [#939](https://github.com/xmrig/xmrig/issues/939) Added support for dynamic (runtime) pools reload.