diff --git a/src/Node_verify.cpp b/src/Node_verify.cpp index 0d491d08e..fab6cf8c4 100644 --- a/src/Node_verify.cpp +++ b/src/Node_verify.cpp @@ -317,10 +317,24 @@ void Node::verify_vdf(std::shared_ptr proof, const uint32_t c max_iters = std::max(max_iters, segments[i].num_iters); } if(have_sha_ni) { - for(uint32_t j = 0; j < num_lanes; ++j) + for(uint32_t j = 0; j < num_lanes; j += 2) { const uint32_t i = chunk * batch_size + j; - recursive_sha256_ni(point[j].data(), segments[i].num_iters); + const uint32_t k = (j + 1 < num_lanes) ? 1 : 0; + const auto num_iters_0 = segments[i].num_iters; + const auto num_iters_1 = segments[i + k].num_iters; + const auto max_iters = std::max(num_iters_0, num_iters_1); + const auto min_iters = std::min(num_iters_0, num_iters_1); + uint8_t hashx2[32 * 2]; + + ::memcpy(hashx2, point[j].data(), 32); + ::memcpy(hashx2 + 32, point[j + k].data(), 32); + recursive_sha256_ni_x2(hashx2, min_iters); + if(num_iters_0 != num_iters_1) { + recursive_sha256_ni(hashx2 + ((num_iters_0 > num_iters_1) ? 0 : 32), max_iters - min_iters); + } + ::memcpy(point[j].data(), hashx2, 32); + ::memcpy(point[j + k].data(), hashx2 + 32, 32); } } else { for(uint32_t k = 0; k < max_iters; ++k)