diff --git a/WifSolverCuda/Worker.cuh b/WifSolverCuda/Worker.cuh index c521e51..c9fe6fd 100644 --- a/WifSolverCuda/Worker.cuh +++ b/WifSolverCuda/Worker.cuh @@ -7,10 +7,10 @@ #include "lib/Math.cuh" -__global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* buffRangeStart, uint64_t* buffStride, const int threadNumberOfChecks); -__global__ void kernelCompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* buffRangeStart, uint64_t* buffStride, const int threadNumberOfChecks); -__global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* buffRangeStart, uint64_t* buffStride, const int threadNumberOfChecks, const uint32_t checksum); -__global__ void kernelCompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* buffRangeStart, uint64_t* buffStride, const int threadNumberOfChecks, const uint32_t checksum); +__global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* const __restrict__ buffRangeStart, const int threadNumberOfChecks); +__global__ void kernelCompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* const __restrict__ buffRangeStart, const int threadNumberOfChecks); +__global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* const __restrict__ buffRangeStart, const int threadNumberOfChecks, const uint32_t checksum); +__global__ void kernelCompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* const __restrict__ buffRangeStart, const int threadNumberOfChecks, const uint32_t checksum); __global__ void resultCollector(bool* buffResult, uint64_t* buffCombinedResult, const uint64_t threadsInBlockNumberOfChecks); __device__ bool _checksumDoubleSha256CheckUncompressed(unsigned int checksum, beu32* d_hash, uint64_t* _start); @@ -21,4 +21,6 @@ __device__ bool _checksumDoubleSha256(unsigned int checksum, beu32* d_hash); __device__ void _add(uint64_t* C, uint64_t* A); __device__ void _load(uint64_t* C, uint64_t* A); -__device__ void IMult(uint64_t* r, uint64_t* a, int64_t b); \ No newline at end of file +__device__ void IMult(uint64_t* r, uint64_t* a, int64_t b); + +cudaError_t loadStride(uint64_t* stride); \ No newline at end of file diff --git a/WifSolverCuda/Worker1.cu b/WifSolverCuda/Worker1.cu index fd1e58e..3927421 100644 --- a/WifSolverCuda/Worker1.cu +++ b/WifSolverCuda/Worker1.cu @@ -1,12 +1,12 @@ #include "Worker.cuh" -__global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* buffRangeStart, uint64_t* buffStride, const int threadNumberOfChecks, const uint32_t checksum) { - uint64_t _stride[5]; +__device__ __constant__ uint64_t _stride[5]; + +__global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* const __restrict__ buffRangeStart, const int threadNumberOfChecks, const uint32_t checksum) { uint64_t _start[5]; uint64_t _startStride[5]; beu32 d_hash[8]; _load(_start, buffRangeStart); - _load(_stride, buffStride); int64_t tIx = (threadIdx.x + blockIdx.x * blockDim.x) * threadNumberOfChecks; IMult(_startStride, _stride, tIx); @@ -19,13 +19,11 @@ __global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, ui _add(_start, _stride); } } -__global__ void kernelCompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* buffRangeStart, uint64_t* buffStride, const int threadNumberOfChecks, const uint32_t checksum) { - uint64_t _stride[5]; +__global__ void kernelCompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* const __restrict__ buffRangeStart, const int threadNumberOfChecks, const uint32_t checksum) { uint64_t _start[5]; uint64_t _startStride[5]; beu32 d_hash[8]; _load(_start, buffRangeStart); - _load(_stride, buffStride); int64_t tIx = (threadIdx.x + blockIdx.x * blockDim.x) * threadNumberOfChecks; IMult(_startStride, _stride, tIx); @@ -42,13 +40,11 @@ __global__ void kernelCompressed(bool* buffResult, bool* buffCollectorWork, uint _add(_start, _stride); } } -__global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* buffRangeStart, uint64_t* buffStride, const int threadNumberOfChecks) { - uint64_t _stride[5]; +__global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* const __restrict__ buffRangeStart, const int threadNumberOfChecks) { uint64_t _start[5]; uint64_t _startStride[5]; beu32 d_hash[8]; _load(_start, buffRangeStart); - _load(_stride, buffStride); int64_t tIx = (threadIdx.x + blockIdx.x * blockDim.x) * threadNumberOfChecks; IMult(_startStride, _stride, tIx); @@ -61,13 +57,11 @@ __global__ void kernelUncompressed(bool* buffResult, bool* buffCollectorWork, ui _add(_start, _stride); } } -__global__ void kernelCompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* buffRangeStart, uint64_t* buffStride, const int threadNumberOfChecks) { - uint64_t _stride[5]; +__global__ void kernelCompressed(bool* buffResult, bool* buffCollectorWork, uint64_t* const __restrict__ buffRangeStart, const int threadNumberOfChecks) { uint64_t _start[5]; uint64_t _startStride[5]; beu32 d_hash[8]; _load(_start, buffRangeStart); - _load(_stride, buffStride); int64_t tIx = (threadIdx.x + blockIdx.x * blockDim.x) * threadNumberOfChecks; IMult(_startStride, _stride, tIx); @@ -194,3 +188,6 @@ __device__ void IMult(uint64_t* r, uint64_t* a, int64_t b) { Mult2(r, t, b) } +cudaError_t loadStride(uint64_t* stride){ + return cudaMemcpyToSymbol(_stride, stride, 5 * sizeof(uint64_t)); +} \ No newline at end of file diff --git a/WifSolverCuda/lib/util.cpp b/WifSolverCuda/lib/util.cpp index 942053b..ec6295d 100644 --- a/WifSolverCuda/lib/util.cpp +++ b/WifSolverCuda/lib/util.cpp @@ -189,11 +189,15 @@ void addressToBase58(char* rmd, char* dst) { memcpy(digest + 1, rmd, 20); sha256((uint8_t*)digest, 21, (uint8_t*)digest + 21); sha256((uint8_t*)digest + 21, 32, (uint8_t*)digest + 21); - if (!b58enc(dst, &pubaddress_size, digest, 25)) { + if (!b58encode(dst, &pubaddress_size, digest, 25)) { fprintf(stderr, "error b58enc\n"); } } +bool b58encode(char* b58, size_t* b58sz, const void* data, size_t binsz) { + return b58enc(b58, b58sz, data, binsz); +} + std::string formatDouble(const char* formatStr, double value) { char buf[100] = { 0 }; diff --git a/WifSolverCuda/lib/util.h b/WifSolverCuda/lib/util.h index c645711..aa5248a 100644 --- a/WifSolverCuda/lib/util.h +++ b/WifSolverCuda/lib/util.h @@ -36,4 +36,6 @@ void addressToBase58(char* rmd, char* dst); std::string formatDouble(const char* formatStr, double value); +bool b58encode(char* b58, size_t* b58sz, const void* data, size_t binsz); + #endif // CUSTOMUTILH diff --git a/WifSolverCuda/main.cu b/WifSolverCuda/main.cu index c0a4199..3e02314 100644 --- a/WifSolverCuda/main.cu +++ b/WifSolverCuda/main.cu @@ -57,7 +57,7 @@ Secp256K1* secp; int main(int argc, char** argv) { - printf("WifSolver 0.4.0\n\n"); + printf("WifSolver 0.4.1\n\n"); if (readArgs(argc, argv)) { showHelp(); @@ -98,11 +98,11 @@ cudaError_t processCuda() { uint64_t* buffRangeStart = new uint64_t[NB64BLOCK]; uint64_t* dev_buffRangeStart = new uint64_t[NB64BLOCK]; uint64_t* buffStride = new uint64_t[NB64BLOCK]; - uint64_t* dev_buffStride = new uint64_t[NB64BLOCK]; int COLLECTOR_SIZE = BLOCK_NUMBER; __Load(buffStride, STRIDE.bits64); + loadStride(buffStride); bool* buffDeviceResult = new bool[outputSize]; bool* dev_buffDeviceResult = new bool[outputSize]; @@ -110,10 +110,7 @@ cudaError_t processCuda() { buffDeviceResult[i] = false; } cudaStatus = cudaMalloc((void**)&dev_buffDeviceResult, outputSize * sizeof(bool)); - cudaStatus = cudaMemcpyAsync(dev_buffDeviceResult, buffDeviceResult, outputSize * sizeof(bool), cudaMemcpyHostToDevice); - - cudaStatus = cudaMalloc((void**)&dev_buffStride, NB64BLOCK * sizeof(uint64_t)); - cudaStatus = cudaMemcpy(dev_buffStride, buffStride, NB64BLOCK * sizeof(uint64_t), cudaMemcpyHostToDevice); + cudaStatus = cudaMemcpyAsync(dev_buffDeviceResult, buffDeviceResult, outputSize * sizeof(bool), cudaMemcpyHostToDevice); uint64_t* buffResult = new uint64_t[COLLECTOR_SIZE]; uint64_t* dev_buffResult = new uint64_t[COLLECTOR_SIZE]; @@ -143,16 +140,16 @@ cudaError_t processCuda() { //launch work if (COMPRESSED) { if (IS_CHECKSUM) { - kernelCompressed << > > (dev_buffDeviceResult, dev_buffCollectorWork, dev_buffRangeStart, dev_buffStride, THREAD_STEPS, expectedChecksum); + kernelCompressed << > > (dev_buffDeviceResult, dev_buffCollectorWork, dev_buffRangeStart, THREAD_STEPS, expectedChecksum); }else{ - kernelCompressed << > > (dev_buffDeviceResult, dev_buffCollectorWork, dev_buffRangeStart, dev_buffStride, THREAD_STEPS); + kernelCompressed << > > (dev_buffDeviceResult, dev_buffCollectorWork, dev_buffRangeStart, THREAD_STEPS); } } else { if (IS_CHECKSUM) { - kernelUncompressed << > > (dev_buffDeviceResult, dev_buffCollectorWork, dev_buffRangeStart, dev_buffStride, THREAD_STEPS, expectedChecksum); + kernelUncompressed << > > (dev_buffDeviceResult, dev_buffCollectorWork, dev_buffRangeStart, THREAD_STEPS, expectedChecksum); }else{ - kernelUncompressed << > > (dev_buffDeviceResult, dev_buffCollectorWork, dev_buffRangeStart, dev_buffStride, THREAD_STEPS); + kernelUncompressed << > > (dev_buffDeviceResult, dev_buffCollectorWork, dev_buffRangeStart, THREAD_STEPS); } } @@ -255,7 +252,6 @@ Error: cudaFree(dev_buffResult); cudaFree(dev_buffDeviceResult); cudaFree(dev_buffRangeStart); - cudaFree(dev_buffStride); cudaFree(dev_buffCollectorWork); return cudaStatus; } @@ -263,7 +259,13 @@ Error: void processCandidate(Int &toTest) { FILE* keys; - char rmdhash[21], address[50]; + size_t dataLen = COMPRESSED ? 38 : 37; + size_t wifLen = 53; + char rmdhash[21], address[50], wif[53]; + unsigned char* buff = new unsigned char[dataLen]; + for (int i = 0, d=dataLen-1; i < dataLen; i++, d--) { + buff[i] = toTest.GetByte(d); + } toTest.SetBase16((char*)toTest.GetBase16().substr(2, 64).c_str()); Point publickey = secp->ComputePublicKey(&toTest); secp->GetHash160(P2PKH, COMPRESSED, publickey, (unsigned char*)rmdhash); @@ -274,9 +276,13 @@ void processCandidate(Int &toTest) { printf("\n"); printf("found: %s\n", address); printf("key : %s\n", toTest.GetBase16().c_str()); + if (b58encode(wif, &wifLen, buff, dataLen)) { + printf("WIF : %s\n", wif); + } keys = fopen(fileResult.c_str(), "a+"); fprintf(keys, "%s\n", address); - fprintf(keys, "%s\n", toTest.GetBase16().c_str()); + fprintf(keys, "%s\n", wif); + fprintf(keys, "%s\n\n", toTest.GetBase16().c_str()); fclose(keys); return; } @@ -285,9 +291,13 @@ void processCandidate(Int &toTest) { printf("\n"); printf("found: %s\n", address); printf("key : %s\n", toTest.GetBase16().c_str()); + if (b58encode(wif, &wifLen, buff, dataLen)) { + printf("WIF : %s\n", wif); + } keys = fopen(fileResultPartial.c_str(), "a+"); fprintf(keys, "%s\n", address); - fprintf(keys, "%s\n", toTest.GetBase16().c_str()); + fprintf(keys, "%s\n", wif); + fprintf(keys, "%s\n\n", toTest.GetBase16().c_str()); fclose(keys); } }